From 86a9909c577bdd641e7483f56ffbec998ea421eb Mon Sep 17 00:00:00 2001 From: Andrew McDermott Date: Wed, 12 Feb 2014 16:59:54 +0000 Subject: Initial import Signed-off-by: Andrew McDermott --- aarch64/bin/container-executor | Bin 0 -> 164559 bytes aarch64/bin/hadoop | 136 + aarch64/bin/hadoop.cmd | 240 + aarch64/bin/hdfs | 203 + aarch64/bin/hdfs.cmd | 171 + aarch64/bin/mapred | 148 + aarch64/bin/mapred.cmd | 195 + aarch64/bin/rcc | 61 + aarch64/bin/test-container-executor | Bin 0 -> 222809 bytes aarch64/bin/yarn | 235 + aarch64/bin/yarn.cmd | 254 + aarch64/etc/hadoop/capacity-scheduler.xml | 111 + aarch64/etc/hadoop/configuration.xsl | 40 + aarch64/etc/hadoop/container-executor.cfg | 4 + aarch64/etc/hadoop/core-site.xml | 20 + aarch64/etc/hadoop/hadoop-env.cmd | 81 + aarch64/etc/hadoop/hadoop-env.sh | 77 + aarch64/etc/hadoop/hadoop-metrics.properties | 75 + aarch64/etc/hadoop/hadoop-metrics2.properties | 44 + aarch64/etc/hadoop/hadoop-policy.xml | 219 + aarch64/etc/hadoop/hdfs-site.xml | 21 + aarch64/etc/hadoop/httpfs-env.sh | 41 + aarch64/etc/hadoop/httpfs-log4j.properties | 35 + aarch64/etc/hadoop/httpfs-signature.secret | 1 + aarch64/etc/hadoop/httpfs-site.xml | 17 + aarch64/etc/hadoop/log4j.properties | 231 + aarch64/etc/hadoop/mapred-env.cmd | 20 + aarch64/etc/hadoop/mapred-env.sh | 27 + aarch64/etc/hadoop/mapred-queues.xml.template | 92 + aarch64/etc/hadoop/mapred-site.xml.template | 21 + aarch64/etc/hadoop/slaves | 1 + aarch64/etc/hadoop/ssl-client.xml.example | 80 + aarch64/etc/hadoop/ssl-server.xml.example | 77 + aarch64/etc/hadoop/yarn-env.cmd | 60 + aarch64/etc/hadoop/yarn-env.sh | 112 + aarch64/etc/hadoop/yarn-site.xml | 19 + aarch64/include/Pipes.hh | 260 + aarch64/include/SerialUtils.hh | 170 + aarch64/include/StringUtils.hh | 81 + aarch64/include/TemplateFactory.hh | 96 + aarch64/include/hdfs.h | 692 + aarch64/lib/native/libhadoop.a | Bin 0 -> 1036576 bytes aarch64/lib/native/libhadoop.so | 1 + aarch64/lib/native/libhadoop.so.1.0.0 | Bin 0 -> 552720 bytes aarch64/lib/native/libhadooppipes.a | Bin 0 -> 1757732 bytes aarch64/lib/native/libhadooputils.a | Bin 0 -> 527602 bytes aarch64/lib/native/libhdfs.a | Bin 0 -> 431394 bytes aarch64/lib/native/libhdfs.so | 1 + aarch64/lib/native/libhdfs.so.0.0.0 | Bin 0 -> 251622 bytes aarch64/libexec/hadoop-config.cmd | 292 + aarch64/libexec/hadoop-config.sh | 295 + aarch64/libexec/hdfs-config.cmd | 43 + aarch64/libexec/hdfs-config.sh | 36 + aarch64/libexec/httpfs-config.sh | 174 + aarch64/libexec/mapred-config.cmd | 43 + aarch64/libexec/mapred-config.sh | 52 + aarch64/libexec/yarn-config.cmd | 72 + aarch64/libexec/yarn-config.sh | 65 + aarch64/sbin/distribute-exclude.sh | 81 + aarch64/sbin/hadoop-daemon.sh | 202 + aarch64/sbin/hadoop-daemons.sh | 36 + aarch64/sbin/hdfs-config.cmd | 43 + aarch64/sbin/hdfs-config.sh | 36 + aarch64/sbin/httpfs.sh | 62 + aarch64/sbin/mr-jobhistory-daemon.sh | 146 + aarch64/sbin/refresh-namenodes.sh | 48 + aarch64/sbin/slaves.sh | 67 + aarch64/sbin/start-all.cmd | 52 + aarch64/sbin/start-all.sh | 38 + aarch64/sbin/start-balancer.sh | 27 + aarch64/sbin/start-dfs.cmd | 41 + aarch64/sbin/start-dfs.sh | 117 + aarch64/sbin/start-secure-dns.sh | 33 + aarch64/sbin/start-yarn.cmd | 47 + aarch64/sbin/start-yarn.sh | 35 + aarch64/sbin/stop-all.cmd | 52 + aarch64/sbin/stop-all.sh | 38 + aarch64/sbin/stop-balancer.sh | 28 + aarch64/sbin/stop-dfs.cmd | 41 + aarch64/sbin/stop-dfs.sh | 89 + aarch64/sbin/stop-secure-dns.sh | 33 + aarch64/sbin/stop-yarn.cmd | 47 + aarch64/sbin/stop-yarn.sh | 35 + aarch64/sbin/yarn-daemon.sh | 160 + aarch64/sbin/yarn-daemons.sh | 38 + aarch64/share/doc/hadoop/common/CHANGES.txt | 13861 +++++ aarch64/share/doc/hadoop/common/LICENSE.txt | 284 + aarch64/share/doc/hadoop/common/NOTICE.txt | 2 + aarch64/share/doc/hadoop/common/README.txt | 31 + aarch64/share/doc/hadoop/hdfs/CHANGES.txt | 6945 +++ aarch64/share/doc/hadoop/hdfs/LICENSE.txt | 271 + aarch64/share/doc/hadoop/hdfs/NOTICE.txt | 2 + aarch64/share/doc/hadoop/mapreduce/CHANGES.txt | 6904 +++ aarch64/share/doc/hadoop/mapreduce/LICENSE.txt | 244 + aarch64/share/doc/hadoop/mapreduce/NOTICE.txt | 2 + aarch64/share/doc/hadoop/yarn/CHANGES.txt | 1698 + aarch64/share/doc/hadoop/yarn/LICENSE.txt | 244 + aarch64/share/doc/hadoop/yarn/NOTICE.txt | 2 + .../hadoop/common/hadoop-common-2.2.0-tests.jar | Bin 0 -> 1352335 bytes .../share/hadoop/common/hadoop-common-2.2.0.jar | Bin 0 -> 2677324 bytes aarch64/share/hadoop/common/hadoop-nfs-2.2.0.jar | Bin 0 -> 139540 bytes .../hadoop/common/jdiff/hadoop-core_0.20.0.xml | 32308 +++++++++++ .../hadoop/common/jdiff/hadoop-core_0.21.0.xml | 25944 +++++++++ .../hadoop/common/jdiff/hadoop-core_0.22.0.xml | 28377 ++++++++++ .../share/hadoop/common/jdiff/hadoop_0.17.0.xml | 43272 +++++++++++++++ .../share/hadoop/common/jdiff/hadoop_0.18.1.xml | 44778 +++++++++++++++ .../share/hadoop/common/jdiff/hadoop_0.18.2.xml | 38788 +++++++++++++ .../share/hadoop/common/jdiff/hadoop_0.18.3.xml | 38826 +++++++++++++ .../share/hadoop/common/jdiff/hadoop_0.19.0.xml | 43972 +++++++++++++++ .../share/hadoop/common/jdiff/hadoop_0.19.1.xml | 44195 +++++++++++++++ .../share/hadoop/common/jdiff/hadoop_0.19.2.xml | 44204 +++++++++++++++ .../share/hadoop/common/jdiff/hadoop_0.20.0.xml | 52140 ++++++++++++++++++ .../share/hadoop/common/jdiff/hadoop_0.20.1.xml | 53832 ++++++++++++++++++ .../share/hadoop/common/jdiff/hadoop_0.20.2.xml | 53959 +++++++++++++++++++ aarch64/share/hadoop/common/lib/activation-1.1.jar | Bin 0 -> 62983 bytes aarch64/share/hadoop/common/lib/asm-3.2.jar | Bin 0 -> 43398 bytes aarch64/share/hadoop/common/lib/avro-1.7.4.jar | Bin 0 -> 303139 bytes .../hadoop/common/lib/commons-beanutils-1.7.0.jar | Bin 0 -> 188671 bytes .../common/lib/commons-beanutils-core-1.8.0.jar | Bin 0 -> 206035 bytes .../share/hadoop/common/lib/commons-cli-1.2.jar | Bin 0 -> 41123 bytes .../share/hadoop/common/lib/commons-codec-1.4.jar | Bin 0 -> 58160 bytes .../common/lib/commons-collections-3.2.1.jar | Bin 0 -> 575389 bytes .../hadoop/common/lib/commons-compress-1.4.1.jar | Bin 0 -> 241367 bytes .../common/lib/commons-configuration-1.6.jar | Bin 0 -> 298829 bytes .../hadoop/common/lib/commons-digester-1.8.jar | Bin 0 -> 143602 bytes aarch64/share/hadoop/common/lib/commons-el-1.0.jar | Bin 0 -> 112341 bytes .../hadoop/common/lib/commons-httpclient-3.1.jar | Bin 0 -> 305001 bytes aarch64/share/hadoop/common/lib/commons-io-2.1.jar | Bin 0 -> 163151 bytes .../share/hadoop/common/lib/commons-lang-2.5.jar | Bin 0 -> 279193 bytes .../hadoop/common/lib/commons-logging-1.1.1.jar | Bin 0 -> 60686 bytes .../share/hadoop/common/lib/commons-math-2.1.jar | Bin 0 -> 832410 bytes .../share/hadoop/common/lib/commons-net-3.1.jar | Bin 0 -> 273370 bytes aarch64/share/hadoop/common/lib/guava-11.0.2.jar | Bin 0 -> 1648200 bytes .../hadoop/common/lib/hadoop-annotations-2.2.0.jar | Bin 0 -> 16781 bytes .../share/hadoop/common/lib/hadoop-auth-2.2.0.jar | Bin 0 -> 49779 bytes .../hadoop/common/lib/jackson-core-asl-1.8.8.jar | Bin 0 -> 227500 bytes .../hadoop/common/lib/jackson-jaxrs-1.8.8.jar | Bin 0 -> 17884 bytes .../hadoop/common/lib/jackson-mapper-asl-1.8.8.jar | Bin 0 -> 668564 bytes .../share/hadoop/common/lib/jackson-xc-1.8.8.jar | Bin 0 -> 32353 bytes .../hadoop/common/lib/jasper-compiler-5.5.23.jar | Bin 0 -> 408133 bytes .../hadoop/common/lib/jasper-runtime-5.5.23.jar | Bin 0 -> 76844 bytes aarch64/share/hadoop/common/lib/jaxb-api-2.2.2.jar | Bin 0 -> 105134 bytes .../share/hadoop/common/lib/jaxb-impl-2.2.3-1.jar | Bin 0 -> 890168 bytes .../share/hadoop/common/lib/jersey-core-1.9.jar | Bin 0 -> 458739 bytes .../share/hadoop/common/lib/jersey-json-1.9.jar | Bin 0 -> 147952 bytes .../share/hadoop/common/lib/jersey-server-1.9.jar | Bin 0 -> 713089 bytes aarch64/share/hadoop/common/lib/jets3t-0.6.1.jar | Bin 0 -> 321806 bytes aarch64/share/hadoop/common/lib/jettison-1.1.jar | Bin 0 -> 67758 bytes aarch64/share/hadoop/common/lib/jetty-6.1.26.jar | Bin 0 -> 539912 bytes .../share/hadoop/common/lib/jetty-util-6.1.26.jar | Bin 0 -> 177131 bytes aarch64/share/hadoop/common/lib/jsch-0.1.42.jar | Bin 0 -> 185746 bytes aarch64/share/hadoop/common/lib/jsp-api-2.1.jar | Bin 0 -> 100636 bytes aarch64/share/hadoop/common/lib/jsr305-1.3.9.jar | Bin 0 -> 33015 bytes aarch64/share/hadoop/common/lib/junit-4.8.2.jar | Bin 0 -> 237344 bytes aarch64/share/hadoop/common/lib/log4j-1.2.17.jar | Bin 0 -> 489884 bytes .../share/hadoop/common/lib/mockito-all-1.8.5.jar | Bin 0 -> 1419869 bytes .../share/hadoop/common/lib/netty-3.6.2.Final.jar | Bin 0 -> 1199572 bytes aarch64/share/hadoop/common/lib/paranamer-2.3.jar | Bin 0 -> 29555 bytes .../hadoop/common/lib/protobuf-java-2.5.0.jar | Bin 0 -> 533455 bytes .../share/hadoop/common/lib/servlet-api-2.5.jar | Bin 0 -> 105112 bytes .../share/hadoop/common/lib/slf4j-api-1.7.5.jar | Bin 0 -> 26084 bytes .../hadoop/common/lib/slf4j-log4j12-1.7.5.jar | Bin 0 -> 8869 bytes .../hadoop/common/lib/snappy-java-1.0.4.1.jar | Bin 0 -> 995968 bytes aarch64/share/hadoop/common/lib/stax-api-1.0.1.jar | Bin 0 -> 26514 bytes aarch64/share/hadoop/common/lib/xmlenc-0.52.jar | Bin 0 -> 15010 bytes aarch64/share/hadoop/common/lib/xz-1.0.jar | Bin 0 -> 94672 bytes .../share/hadoop/common/lib/zookeeper-3.4.5.jar | Bin 0 -> 779974 bytes .../common/sources/hadoop-common-2.2.0-sources.jar | Bin 0 -> 1681090 bytes .../sources/hadoop-common-2.2.0-test-sources.jar | Bin 0 -> 746234 bytes .../share/hadoop/common/templates/core-site.xml | 20 + .../share/hadoop/hdfs/hadoop-hdfs-2.2.0-tests.jar | Bin 0 -> 1988555 bytes aarch64/share/hadoop/hdfs/hadoop-hdfs-2.2.0.jar | Bin 0 -> 5242564 bytes .../share/hadoop/hdfs/hadoop-hdfs-nfs-2.2.0.jar | Bin 0 -> 71670 bytes .../share/hadoop/hdfs/jdiff/hadoop-hdfs_0.20.0.xml | 10389 ++++ .../share/hadoop/hdfs/jdiff/hadoop-hdfs_0.21.0.xml | 16220 ++++++ .../share/hadoop/hdfs/jdiff/hadoop-hdfs_0.22.0.xml | 18589 +++++++ aarch64/share/hadoop/hdfs/lib/asm-3.2.jar | Bin 0 -> 43398 bytes aarch64/share/hadoop/hdfs/lib/commons-cli-1.2.jar | Bin 0 -> 41123 bytes .../share/hadoop/hdfs/lib/commons-codec-1.4.jar | Bin 0 -> 58160 bytes .../hadoop/hdfs/lib/commons-daemon-1.0.13.jar | Bin 0 -> 24239 bytes aarch64/share/hadoop/hdfs/lib/commons-el-1.0.jar | Bin 0 -> 112341 bytes aarch64/share/hadoop/hdfs/lib/commons-io-2.1.jar | Bin 0 -> 163151 bytes aarch64/share/hadoop/hdfs/lib/commons-lang-2.5.jar | Bin 0 -> 279193 bytes .../hadoop/hdfs/lib/commons-logging-1.1.1.jar | Bin 0 -> 60686 bytes aarch64/share/hadoop/hdfs/lib/guava-11.0.2.jar | Bin 0 -> 1648200 bytes .../hadoop/hdfs/lib/jackson-core-asl-1.8.8.jar | Bin 0 -> 227500 bytes .../hadoop/hdfs/lib/jackson-mapper-asl-1.8.8.jar | Bin 0 -> 668564 bytes .../hadoop/hdfs/lib/jasper-runtime-5.5.23.jar | Bin 0 -> 76844 bytes aarch64/share/hadoop/hdfs/lib/jersey-core-1.9.jar | Bin 0 -> 458739 bytes .../share/hadoop/hdfs/lib/jersey-server-1.9.jar | Bin 0 -> 713089 bytes aarch64/share/hadoop/hdfs/lib/jetty-6.1.26.jar | Bin 0 -> 539912 bytes .../share/hadoop/hdfs/lib/jetty-util-6.1.26.jar | Bin 0 -> 177131 bytes aarch64/share/hadoop/hdfs/lib/jsp-api-2.1.jar | Bin 0 -> 100636 bytes aarch64/share/hadoop/hdfs/lib/jsr305-1.3.9.jar | Bin 0 -> 33015 bytes aarch64/share/hadoop/hdfs/lib/log4j-1.2.17.jar | Bin 0 -> 489884 bytes .../share/hadoop/hdfs/lib/netty-3.6.2.Final.jar | Bin 0 -> 1199572 bytes .../share/hadoop/hdfs/lib/protobuf-java-2.5.0.jar | Bin 0 -> 533455 bytes aarch64/share/hadoop/hdfs/lib/servlet-api-2.5.jar | Bin 0 -> 105112 bytes aarch64/share/hadoop/hdfs/lib/xmlenc-0.52.jar | Bin 0 -> 15010 bytes .../hdfs/sources/hadoop-hdfs-2.2.0-sources.jar | Bin 0 -> 1979061 bytes .../sources/hadoop-hdfs-2.2.0-test-sources.jar | Bin 0 -> 1300644 bytes aarch64/share/hadoop/hdfs/templates/hdfs-site.xml | 21 + .../hadoop/hdfs/webapps/datanode/WEB-INF/web.xml | 59 + .../share/hadoop/hdfs/webapps/datanode/robots.txt | 2 + .../share/hadoop/hdfs/webapps/hdfs/WEB-INF/web.xml | 109 + .../hadoop/hdfs/webapps/hdfs/decommission.xsl | 139 + .../hadoop/hdfs/webapps/hdfs/dfsclusterhealth.xsl | 170 + .../hdfs/webapps/hdfs/dfsclusterhealth_utils.xsl | 88 + aarch64/share/hadoop/hdfs/webapps/hdfs/index.html | 35 + .../hadoop/hdfs/webapps/journal/WEB-INF/web.xml | 39 + .../share/hadoop/hdfs/webapps/journal/index.html | 29 + .../hadoop/hdfs/webapps/secondary/WEB-INF/web.xml | 39 + .../share/hadoop/hdfs/webapps/secondary/index.html | 29 + .../share/hadoop/hdfs/webapps/static/hadoop.css | 190 + aarch64/share/hadoop/httpfs/tomcat/LICENSE | 707 + aarch64/share/hadoop/httpfs/tomcat/NOTICE | 16 + aarch64/share/hadoop/httpfs/tomcat/RELEASE-NOTES | 234 + aarch64/share/hadoop/httpfs/tomcat/RUNNING.txt | 454 + .../share/hadoop/httpfs/tomcat/bin/bootstrap.jar | Bin 0 -> 22706 bytes .../hadoop/httpfs/tomcat/bin/catalina-tasks.xml | 58 + .../share/hadoop/httpfs/tomcat/bin/catalina.bat | 286 + aarch64/share/hadoop/httpfs/tomcat/bin/catalina.sh | 506 + .../httpfs/tomcat/bin/commons-daemon-native.tar.gz | Bin 0 -> 202519 bytes .../hadoop/httpfs/tomcat/bin/commons-daemon.jar | Bin 0 -> 24242 bytes .../share/hadoop/httpfs/tomcat/bin/cpappend.bat | 35 + aarch64/share/hadoop/httpfs/tomcat/bin/digest.bat | 56 + aarch64/share/hadoop/httpfs/tomcat/bin/digest.sh | 48 + .../hadoop/httpfs/tomcat/bin/setclasspath.bat | 82 + .../share/hadoop/httpfs/tomcat/bin/setclasspath.sh | 116 + .../share/hadoop/httpfs/tomcat/bin/shutdown.bat | 59 + aarch64/share/hadoop/httpfs/tomcat/bin/shutdown.sh | 48 + aarch64/share/hadoop/httpfs/tomcat/bin/startup.bat | 59 + aarch64/share/hadoop/httpfs/tomcat/bin/startup.sh | 65 + .../share/hadoop/httpfs/tomcat/bin/tomcat-juli.jar | Bin 0 -> 32278 bytes .../hadoop/httpfs/tomcat/bin/tomcat-native.tar.gz | Bin 0 -> 258558 bytes .../hadoop/httpfs/tomcat/bin/tool-wrapper.bat | 85 + .../share/hadoop/httpfs/tomcat/bin/tool-wrapper.sh | 99 + aarch64/share/hadoop/httpfs/tomcat/bin/version.bat | 59 + aarch64/share/hadoop/httpfs/tomcat/bin/version.sh | 48 + .../hadoop/httpfs/tomcat/conf/catalina.policy | 222 + .../hadoop/httpfs/tomcat/conf/catalina.properties | 81 + .../share/hadoop/httpfs/tomcat/conf/context.xml | 35 + .../hadoop/httpfs/tomcat/conf/logging.properties | 67 + aarch64/share/hadoop/httpfs/tomcat/conf/server.xml | 150 + .../hadoop/httpfs/tomcat/conf/tomcat-users.xml | 36 + aarch64/share/hadoop/httpfs/tomcat/conf/web.xml | 1249 + .../hadoop/httpfs/tomcat/lib/annotations-api.jar | Bin 0 -> 15240 bytes .../hadoop/httpfs/tomcat/lib/catalina-ant.jar | Bin 0 -> 54565 bytes .../share/hadoop/httpfs/tomcat/lib/catalina-ha.jar | Bin 0 -> 132132 bytes .../hadoop/httpfs/tomcat/lib/catalina-tribes.jar | Bin 0 -> 237521 bytes .../share/hadoop/httpfs/tomcat/lib/catalina.jar | Bin 0 -> 1243752 bytes .../share/hadoop/httpfs/tomcat/lib/ecj-3.7.2.jar | Bin 0 -> 1749257 bytes aarch64/share/hadoop/httpfs/tomcat/lib/el-api.jar | Bin 0 -> 33314 bytes .../share/hadoop/httpfs/tomcat/lib/jasper-el.jar | Bin 0 -> 112554 bytes aarch64/share/hadoop/httpfs/tomcat/lib/jasper.jar | Bin 0 -> 527671 bytes aarch64/share/hadoop/httpfs/tomcat/lib/jsp-api.jar | Bin 0 -> 76691 bytes .../share/hadoop/httpfs/tomcat/lib/servlet-api.jar | Bin 0 -> 88499 bytes .../hadoop/httpfs/tomcat/lib/tomcat-coyote.jar | Bin 0 -> 771696 bytes .../share/hadoop/httpfs/tomcat/lib/tomcat-dbcp.jar | Bin 0 -> 253633 bytes .../hadoop/httpfs/tomcat/lib/tomcat-i18n-es.jar | Bin 0 -> 70018 bytes .../hadoop/httpfs/tomcat/lib/tomcat-i18n-fr.jar | Bin 0 -> 51901 bytes .../hadoop/httpfs/tomcat/lib/tomcat-i18n-ja.jar | Bin 0 -> 54509 bytes .../hadoop/httpfs/tomcat/temp/safeToDelete.tmp | 0 .../httpfs/tomcat/webapps/ROOT/WEB-INF/web.xml | 16 + .../hadoop/httpfs/tomcat/webapps/ROOT/index.html | 21 + .../WEB-INF/classes/default-log4j.properties | 20 + .../webhdfs/WEB-INF/classes/httpfs-default.xml | 237 + .../webhdfs/WEB-INF/classes/httpfs.properties | 21 + .../hadoop/fs/http/client/HttpFSFileSystem$1.class | Bin 0 -> 1136 bytes .../hadoop/fs/http/client/HttpFSFileSystem$2.class | Bin 0 -> 1399 bytes .../hadoop/fs/http/client/HttpFSFileSystem$3.class | Bin 0 -> 1761 bytes .../hadoop/fs/http/client/HttpFSFileSystem$4.class | Bin 0 -> 2037 bytes .../hadoop/fs/http/client/HttpFSFileSystem$5.class | Bin 0 -> 1863 bytes .../hadoop/fs/http/client/HttpFSFileSystem$6.class | Bin 0 -> 1009 bytes .../http/client/HttpFSFileSystem$FILE_TYPE.class | Bin 0 -> 2141 bytes .../HttpFSFileSystem$HttpFSDataInputStream.class | Bin 0 -> 1589 bytes .../HttpFSFileSystem$HttpFSDataOutputStream.class | Bin 0 -> 1406 bytes .../http/client/HttpFSFileSystem$Operation.class | Bin 0 -> 2723 bytes .../hadoop/fs/http/client/HttpFSFileSystem.class | Bin 0 -> 24410 bytes ...rosAuthenticator$DelegationTokenOperation.class | Bin 0 -> 2203 bytes .../http/client/HttpFSKerberosAuthenticator.class | Bin 0 -> 7214 bytes .../fs/http/client/HttpFSPseudoAuthenticator.class | Bin 0 -> 1339 bytes .../apache/hadoop/fs/http/client/HttpFSUtils.class | Bin 0 -> 5333 bytes .../http/server/CheckUploadContentTypeFilter.class | Bin 0 -> 3107 bytes .../fs/http/server/FSOperations$FSAppend.class | Bin 0 -> 2191 bytes .../fs/http/server/FSOperations$FSConcat.class | Bin 0 -> 1918 bytes .../server/FSOperations$FSContentSummary.class | Bin 0 -> 1827 bytes .../fs/http/server/FSOperations$FSCreate.class | Bin 0 -> 2908 bytes .../fs/http/server/FSOperations$FSDelete.class | Bin 0 -> 1988 bytes .../http/server/FSOperations$FSFileChecksum.class | Bin 0 -> 1807 bytes .../fs/http/server/FSOperations$FSFileStatus.class | Bin 0 -> 1791 bytes .../fs/http/server/FSOperations$FSHomeDir.class | Bin 0 -> 1848 bytes .../fs/http/server/FSOperations$FSListStatus.class | Bin 0 -> 2294 bytes .../fs/http/server/FSOperations$FSMkdirs.class | Bin 0 -> 2110 bytes .../fs/http/server/FSOperations$FSOpen.class | Bin 0 -> 1997 bytes .../fs/http/server/FSOperations$FSRename.class | Bin 0 -> 1963 bytes .../fs/http/server/FSOperations$FSSetOwner.class | Bin 0 -> 1809 bytes .../http/server/FSOperations$FSSetPermission.class | Bin 0 -> 1918 bytes .../server/FSOperations$FSSetReplication.class | Bin 0 -> 2037 bytes .../fs/http/server/FSOperations$FSSetTimes.class | Bin 0 -> 1746 bytes .../hadoop/fs/http/server/FSOperations.class | Bin 0 -> 6459 bytes .../http/server/HttpFSAuthenticationFilter.class | Bin 0 -> 3633 bytes .../fs/http/server/HttpFSExceptionProvider.class | Bin 0 -> 2899 bytes .../HttpFSKerberosAuthenticationHandler$1.class | Bin 0 -> 1290 bytes .../HttpFSKerberosAuthenticationHandler.class | Bin 0 -> 7845 bytes .../HttpFSParametersProvider$AccessTimeParam.class | Bin 0 -> 969 bytes .../HttpFSParametersProvider$BlockSizeParam.class | Bin 0 -> 965 bytes .../HttpFSParametersProvider$DataParam.class | Bin 0 -> 946 bytes ...HttpFSParametersProvider$DestinationParam.class | Bin 0 -> 901 bytes .../HttpFSParametersProvider$DoAsParam.class | Bin 0 -> 1529 bytes .../HttpFSParametersProvider$FilterParam.class | Bin 0 -> 881 bytes .../HttpFSParametersProvider$GroupParam.class | Bin 0 -> 1007 bytes .../server/HttpFSParametersProvider$LenParam.class | Bin 0 -> 944 bytes ...ttpFSParametersProvider$ModifiedTimeParam.class | Bin 0 -> 981 bytes .../HttpFSParametersProvider$OffsetParam.class | Bin 0 -> 942 bytes .../HttpFSParametersProvider$OperationParam.class | Bin 0 -> 1385 bytes .../HttpFSParametersProvider$OverwriteParam.class | Bin 0 -> 966 bytes .../HttpFSParametersProvider$OwnerParam.class | Bin 0 -> 1007 bytes .../HttpFSParametersProvider$PermissionParam.class | Bin 0 -> 1006 bytes .../HttpFSParametersProvider$RecursiveParam.class | Bin 0 -> 966 bytes ...HttpFSParametersProvider$ReplicationParam.class | Bin 0 -> 966 bytes .../HttpFSParametersProvider$SourcesParam.class | Bin 0 -> 885 bytes .../fs/http/server/HttpFSParametersProvider.class | Bin 0 -> 4087 bytes .../fs/http/server/HttpFSReleaseFilter.class | Bin 0 -> 1028 bytes .../hadoop/fs/http/server/HttpFSServer$1.class | Bin 0 -> 2000 bytes .../hadoop/fs/http/server/HttpFSServer.class | Bin 0 -> 19404 bytes .../hadoop/fs/http/server/HttpFSServerWebApp.class | Bin 0 -> 3194 bytes .../apache/hadoop/lib/lang/RunnableCallable.class | Bin 0 -> 2159 bytes .../apache/hadoop/lib/lang/XException$ERROR.class | Bin 0 -> 261 bytes .../org/apache/hadoop/lib/lang/XException.class | Bin 0 -> 2674 bytes .../org/apache/hadoop/lib/server/BaseService.class | Bin 0 -> 3470 bytes .../apache/hadoop/lib/server/Server$Status.class | Bin 0 -> 2043 bytes .../org/apache/hadoop/lib/server/Server.class | Bin 0 -> 18236 bytes .../hadoop/lib/server/ServerException$ERROR.class | Bin 0 -> 3109 bytes .../apache/hadoop/lib/server/ServerException.class | Bin 0 -> 1140 bytes .../org/apache/hadoop/lib/server/Service.class | Bin 0 -> 915 bytes .../hadoop/lib/server/ServiceException.class | Bin 0 -> 910 bytes .../lib/service/DelegationTokenIdentifier.class | Bin 0 -> 1206 bytes .../lib/service/DelegationTokenManager.class | Bin 0 -> 1576 bytes .../DelegationTokenManagerException$ERROR.class | Bin 0 -> 2000 bytes .../service/DelegationTokenManagerException.class | Bin 0 -> 1106 bytes .../FileSystemAccess$FileSystemExecutor.class | Bin 0 -> 500 bytes .../hadoop/lib/service/FileSystemAccess.class | Bin 0 -> 1250 bytes .../service/FileSystemAccessException$ERROR.class | Bin 0 -> 2612 bytes .../lib/service/FileSystemAccessException.class | Bin 0 -> 1070 bytes .../org/apache/hadoop/lib/service/Groups.class | Bin 0 -> 575 bytes .../hadoop/lib/service/Instrumentation$Cron.class | Bin 0 -> 323 bytes .../lib/service/Instrumentation$Variable.class | Bin 0 -> 364 bytes .../hadoop/lib/service/Instrumentation.class | Bin 0 -> 1417 bytes .../org/apache/hadoop/lib/service/ProxyUser.class | Bin 0 -> 566 bytes .../org/apache/hadoop/lib/service/Scheduler.class | Bin 0 -> 644 bytes .../service/hadoop/FileSystemAccessService$1.class | Bin 0 -> 1400 bytes .../service/hadoop/FileSystemAccessService$2.class | Bin 0 -> 1389 bytes .../service/hadoop/FileSystemAccessService$3.class | Bin 0 -> 3036 bytes .../service/hadoop/FileSystemAccessService$4.class | Bin 0 -> 1437 bytes .../FileSystemAccessService$CachedFileSystem.class | Bin 0 -> 1557 bytes ...SystemAccessService$FileSystemCachePurger.class | Bin 0 -> 2630 bytes .../service/hadoop/FileSystemAccessService.class | Bin 0 -> 15405 bytes .../instrumentation/InstrumentationService$1.class | Bin 0 -> 1316 bytes .../instrumentation/InstrumentationService$2.class | Bin 0 -> 1315 bytes .../instrumentation/InstrumentationService$3.class | Bin 0 -> 1317 bytes .../InstrumentationService$Cron.class | Bin 0 -> 1567 bytes .../InstrumentationService$Sampler.class | Bin 0 -> 2942 bytes .../InstrumentationService$SamplersRunnable.class | Bin 0 -> 1734 bytes .../InstrumentationService$Timer.class | Bin 0 -> 3008 bytes .../InstrumentationService$VariableHolder.class | Bin 0 -> 2192 bytes .../instrumentation/InstrumentationService.class | Bin 0 -> 9958 bytes .../lib/service/scheduler/SchedulerService$1.class | Bin 0 -> 3280 bytes .../lib/service/scheduler/SchedulerService.class | Bin 0 -> 5109 bytes ...nagerService$DelegationTokenSecretManager.class | Bin 0 -> 1345 bytes .../security/DelegationTokenManagerService.class | Bin 0 -> 7282 bytes .../lib/service/security/GroupsService.class | Bin 0 -> 1812 bytes .../service/security/ProxyUserService$ERROR.class | Bin 0 -> 1983 bytes .../lib/service/security/ProxyUserService.class | Bin 0 -> 7297 bytes .../lib/servlet/FileSystemReleaseFilter.class | Bin 0 -> 2345 bytes .../apache/hadoop/lib/servlet/HostnameFilter.class | Bin 0 -> 2871 bytes .../org/apache/hadoop/lib/servlet/MDCFilter.class | Bin 0 -> 2242 bytes .../apache/hadoop/lib/servlet/ServerWebApp.class | Bin 0 -> 5411 bytes .../classes/org/apache/hadoop/lib/util/Check.class | Bin 0 -> 4014 bytes .../hadoop/lib/util/ConfigurationUtils.class | Bin 0 -> 5061 bytes .../org/apache/hadoop/lib/wsrs/BooleanParam.class | Bin 0 -> 1765 bytes .../org/apache/hadoop/lib/wsrs/ByteParam.class | Bin 0 -> 1354 bytes .../org/apache/hadoop/lib/wsrs/EnumParam.class | Bin 0 -> 2070 bytes .../apache/hadoop/lib/wsrs/ExceptionProvider.class | Bin 0 -> 3335 bytes .../apache/hadoop/lib/wsrs/InputStreamEntity.class | Bin 0 -> 1446 bytes .../org/apache/hadoop/lib/wsrs/IntegerParam.class | Bin 0 -> 1384 bytes .../apache/hadoop/lib/wsrs/JSONMapProvider.class | Bin 0 -> 3997 bytes .../org/apache/hadoop/lib/wsrs/JSONProvider.class | Bin 0 -> 4081 bytes .../org/apache/hadoop/lib/wsrs/LongParam.class | Bin 0 -> 1354 bytes .../classes/org/apache/hadoop/lib/wsrs/Param.class | Bin 0 -> 2130 bytes .../org/apache/hadoop/lib/wsrs/Parameters.class | Bin 0 -> 1412 bytes .../hadoop/lib/wsrs/ParametersProvider.class | Bin 0 -> 5689 bytes .../org/apache/hadoop/lib/wsrs/ShortParam.class | Bin 0 -> 1560 bytes .../org/apache/hadoop/lib/wsrs/StringParam.class | Bin 0 -> 2498 bytes .../apache/hadoop/lib/wsrs/UserProvider$1.class | Bin 0 -> 1015 bytes .../hadoop/lib/wsrs/UserProvider$UserParam.class | Bin 0 -> 1337 bytes .../org/apache/hadoop/lib/wsrs/UserProvider.class | Bin 0 -> 4065 bytes .../webapps/webhdfs/WEB-INF/lib/activation-1.1.jar | Bin 0 -> 62983 bytes .../tomcat/webapps/webhdfs/WEB-INF/lib/asm-3.2.jar | Bin 0 -> 43398 bytes .../webapps/webhdfs/WEB-INF/lib/avro-1.7.4.jar | Bin 0 -> 303139 bytes .../WEB-INF/lib/commons-beanutils-1.7.0.jar | Bin 0 -> 188671 bytes .../WEB-INF/lib/commons-beanutils-core-1.8.0.jar | Bin 0 -> 206035 bytes .../webhdfs/WEB-INF/lib/commons-cli-1.2.jar | Bin 0 -> 41123 bytes .../webhdfs/WEB-INF/lib/commons-codec-1.4.jar | Bin 0 -> 58160 bytes .../WEB-INF/lib/commons-collections-3.2.1.jar | Bin 0 -> 575389 bytes .../webhdfs/WEB-INF/lib/commons-compress-1.4.1.jar | Bin 0 -> 241367 bytes .../WEB-INF/lib/commons-configuration-1.6.jar | Bin 0 -> 298829 bytes .../webhdfs/WEB-INF/lib/commons-daemon-1.0.13.jar | Bin 0 -> 24239 bytes .../webhdfs/WEB-INF/lib/commons-digester-1.8.jar | Bin 0 -> 143602 bytes .../webapps/webhdfs/WEB-INF/lib/commons-io-2.1.jar | Bin 0 -> 163151 bytes .../webhdfs/WEB-INF/lib/commons-lang-2.5.jar | Bin 0 -> 279193 bytes .../webhdfs/WEB-INF/lib/commons-logging-1.1.1.jar | Bin 0 -> 60686 bytes .../webhdfs/WEB-INF/lib/commons-math-2.1.jar | Bin 0 -> 832410 bytes .../webhdfs/WEB-INF/lib/commons-net-3.1.jar | Bin 0 -> 273370 bytes .../webapps/webhdfs/WEB-INF/lib/guava-11.0.2.jar | Bin 0 -> 1648200 bytes .../WEB-INF/lib/hadoop-annotations-2.2.0.jar | Bin 0 -> 16781 bytes .../webhdfs/WEB-INF/lib/hadoop-auth-2.2.0.jar | Bin 0 -> 49779 bytes .../webhdfs/WEB-INF/lib/hadoop-common-2.2.0.jar | Bin 0 -> 2677324 bytes .../webhdfs/WEB-INF/lib/hadoop-hdfs-2.2.0.jar | Bin 0 -> 5242564 bytes .../webhdfs/WEB-INF/lib/jackson-core-asl-1.8.8.jar | Bin 0 -> 227500 bytes .../webhdfs/WEB-INF/lib/jackson-jaxrs-1.8.8.jar | Bin 0 -> 17884 bytes .../WEB-INF/lib/jackson-mapper-asl-1.8.8.jar | Bin 0 -> 668564 bytes .../webhdfs/WEB-INF/lib/jackson-xc-1.8.8.jar | Bin 0 -> 32353 bytes .../webapps/webhdfs/WEB-INF/lib/jaxb-api-2.2.2.jar | Bin 0 -> 105134 bytes .../webhdfs/WEB-INF/lib/jaxb-impl-2.2.3-1.jar | Bin 0 -> 890168 bytes .../webhdfs/WEB-INF/lib/jersey-core-1.9.jar | Bin 0 -> 458739 bytes .../webhdfs/WEB-INF/lib/jersey-json-1.9.jar | Bin 0 -> 147952 bytes .../webhdfs/WEB-INF/lib/jersey-server-1.9.jar | Bin 0 -> 713089 bytes .../webapps/webhdfs/WEB-INF/lib/jettison-1.1.jar | Bin 0 -> 67758 bytes .../webapps/webhdfs/WEB-INF/lib/jsch-0.1.42.jar | Bin 0 -> 185746 bytes .../webhdfs/WEB-INF/lib/json-simple-1.1.jar | Bin 0 -> 16046 bytes .../webapps/webhdfs/WEB-INF/lib/jsr305-1.3.9.jar | Bin 0 -> 33015 bytes .../webapps/webhdfs/WEB-INF/lib/log4j-1.2.17.jar | Bin 0 -> 489884 bytes .../webapps/webhdfs/WEB-INF/lib/paranamer-2.3.jar | Bin 0 -> 29555 bytes .../webhdfs/WEB-INF/lib/protobuf-java-2.5.0.jar | Bin 0 -> 533455 bytes .../webhdfs/WEB-INF/lib/slf4j-api-1.7.5.jar | Bin 0 -> 26084 bytes .../webhdfs/WEB-INF/lib/slf4j-log4j12-1.7.5.jar | Bin 0 -> 8869 bytes .../webhdfs/WEB-INF/lib/snappy-java-1.0.4.1.jar | Bin 0 -> 995968 bytes .../webapps/webhdfs/WEB-INF/lib/stax-api-1.0.1.jar | Bin 0 -> 26514 bytes .../webapps/webhdfs/WEB-INF/lib/xmlenc-0.52.jar | Bin 0 -> 15010 bytes .../tomcat/webapps/webhdfs/WEB-INF/lib/xz-1.0.jar | Bin 0 -> 94672 bytes .../webhdfs/WEB-INF/lib/zookeeper-3.4.5.jar | Bin 0 -> 779974 bytes .../httpfs/tomcat/webapps/webhdfs/WEB-INF/web.xml | 98 + .../hadoop-mapreduce-client-app-2.2.0.jar | Bin 0 -> 482132 bytes .../hadoop-mapreduce-client-common-2.2.0.jar | Bin 0 -> 656310 bytes .../hadoop-mapreduce-client-core-2.2.0.jar | Bin 0 -> 1455462 bytes .../mapreduce/hadoop-mapreduce-client-hs-2.2.0.jar | Bin 0 -> 117197 bytes .../hadoop-mapreduce-client-hs-plugins-2.2.0.jar | Bin 0 -> 4057 bytes ...doop-mapreduce-client-jobclient-2.2.0-tests.jar | Bin 0 -> 1434955 bytes .../hadoop-mapreduce-client-jobclient-2.2.0.jar | Bin 0 -> 35209 bytes .../hadoop-mapreduce-client-shuffle-2.2.0.jar | Bin 0 -> 21538 bytes .../mapreduce/hadoop-mapreduce-examples-2.2.0.jar | Bin 0 -> 270272 bytes .../hadoop/mapreduce/lib-examples/hsqldb-2.0.0.jar | Bin 0 -> 1256297 bytes .../share/hadoop/mapreduce/lib/aopalliance-1.0.jar | Bin 0 -> 4467 bytes aarch64/share/hadoop/mapreduce/lib/asm-3.2.jar | Bin 0 -> 43398 bytes aarch64/share/hadoop/mapreduce/lib/avro-1.7.4.jar | Bin 0 -> 303139 bytes .../mapreduce/lib/commons-compress-1.4.1.jar | Bin 0 -> 241367 bytes .../share/hadoop/mapreduce/lib/commons-io-2.1.jar | Bin 0 -> 163151 bytes aarch64/share/hadoop/mapreduce/lib/guice-3.0.jar | Bin 0 -> 710492 bytes .../hadoop/mapreduce/lib/guice-servlet-3.0.jar | Bin 0 -> 65012 bytes .../mapreduce/lib/hadoop-annotations-2.2.0.jar | Bin 0 -> 16781 bytes .../hadoop/mapreduce/lib/hamcrest-core-1.1.jar | Bin 0 -> 76643 bytes .../mapreduce/lib/jackson-core-asl-1.8.8.jar | Bin 0 -> 227500 bytes .../mapreduce/lib/jackson-mapper-asl-1.8.8.jar | Bin 0 -> 668564 bytes .../share/hadoop/mapreduce/lib/javax.inject-1.jar | Bin 0 -> 2497 bytes .../share/hadoop/mapreduce/lib/jersey-core-1.9.jar | Bin 0 -> 458739 bytes .../hadoop/mapreduce/lib/jersey-guice-1.9.jar | Bin 0 -> 14786 bytes .../hadoop/mapreduce/lib/jersey-server-1.9.jar | Bin 0 -> 713089 bytes aarch64/share/hadoop/mapreduce/lib/junit-4.10.jar | Bin 0 -> 253160 bytes .../share/hadoop/mapreduce/lib/log4j-1.2.17.jar | Bin 0 -> 489884 bytes .../hadoop/mapreduce/lib/netty-3.6.2.Final.jar | Bin 0 -> 1199572 bytes .../share/hadoop/mapreduce/lib/paranamer-2.3.jar | Bin 0 -> 29555 bytes .../hadoop/mapreduce/lib/protobuf-java-2.5.0.jar | Bin 0 -> 533455 bytes .../hadoop/mapreduce/lib/snappy-java-1.0.4.1.jar | Bin 0 -> 995968 bytes aarch64/share/hadoop/mapreduce/lib/xz-1.0.jar | Bin 0 -> 94672 bytes .../hadoop-mapreduce-client-app-2.2.0-sources.jar | Bin 0 -> 278860 bytes ...oop-mapreduce-client-app-2.2.0-test-sources.jar | Bin 0 -> 144052 bytes ...adoop-mapreduce-client-common-2.2.0-sources.jar | Bin 0 -> 244744 bytes ...-mapreduce-client-common-2.2.0-test-sources.jar | Bin 0 -> 24308 bytes .../hadoop-mapreduce-client-core-2.2.0-sources.jar | Bin 0 -> 1008323 bytes ...op-mapreduce-client-core-2.2.0-test-sources.jar | Bin 0 -> 67089 bytes .../hadoop-mapreduce-client-hs-2.2.0-sources.jar | Bin 0 -> 72681 bytes ...doop-mapreduce-client-hs-2.2.0-test-sources.jar | Bin 0 -> 63255 bytes ...p-mapreduce-client-hs-plugins-2.2.0-sources.jar | Bin 0 -> 2394 bytes ...reduce-client-hs-plugins-2.2.0-test-sources.jar | Bin 0 -> 2352 bytes ...op-mapreduce-client-jobclient-2.2.0-sources.jar | Bin 0 -> 21193 bytes ...preduce-client-jobclient-2.2.0-test-sources.jar | Bin 0 -> 694739 bytes ...doop-mapreduce-client-shuffle-2.2.0-sources.jar | Bin 0 -> 10600 bytes ...mapreduce-client-shuffle-2.2.0-test-sources.jar | Bin 0 -> 6453 bytes .../hadoop-mapreduce-examples-2.2.0-sources.jar | Bin 0 -> 695908 bytes ...adoop-mapreduce-examples-2.2.0-test-sources.jar | Bin 0 -> 12964 bytes .../hadoop/tools/lib/hadoop-archives-2.2.0.jar | Bin 0 -> 21487 bytes .../hadoop/tools/lib/hadoop-datajoin-2.2.0.jar | Bin 0 -> 14547 bytes .../share/hadoop/tools/lib/hadoop-distcp-2.2.0.jar | Bin 0 -> 80387 bytes .../share/hadoop/tools/lib/hadoop-extras-2.2.0.jar | Bin 0 -> 62040 bytes .../hadoop/tools/lib/hadoop-gridmix-2.2.0.jar | Bin 0 -> 215354 bytes .../share/hadoop/tools/lib/hadoop-rumen-2.2.0.jar | Bin 0 -> 277586 bytes .../hadoop/tools/lib/hadoop-streaming-2.2.0.jar | Bin 0 -> 102790 bytes .../sources/hadoop-archives-2.2.0-sources.jar | Bin 0 -> 9636 bytes .../sources/hadoop-archives-2.2.0-test-sources.jar | Bin 0 -> 3185 bytes .../sources/hadoop-datajoin-2.2.0-sources.jar | Bin 0 -> 12200 bytes .../sources/hadoop-datajoin-2.2.0-test-sources.jar | Bin 0 -> 7197 bytes .../tools/sources/hadoop-distcp-2.2.0-sources.jar | Bin 0 -> 59176 bytes .../sources/hadoop-distcp-2.2.0-test-sources.jar | Bin 0 -> 38610 bytes .../tools/sources/hadoop-extras-2.2.0-sources.jar | Bin 0 -> 30647 bytes .../sources/hadoop-extras-2.2.0-test-sources.jar | Bin 0 -> 13893 bytes .../tools/sources/hadoop-gridmix-2.2.0-sources.jar | Bin 0 -> 121404 bytes .../sources/hadoop-gridmix-2.2.0-test-sources.jar | Bin 0 -> 71676 bytes .../tools/sources/hadoop-rumen-2.2.0-sources.jar | Bin 0 -> 170000 bytes .../sources/hadoop-rumen-2.2.0-test-sources.jar | Bin 0 -> 9314 bytes .../sources/hadoop-streaming-2.2.0-sources.jar | Bin 0 -> 71829 bytes .../hadoop-streaming-2.2.0-test-sources.jar | Bin 0 -> 76355 bytes .../share/hadoop/yarn/hadoop-yarn-api-2.2.0.jar | Bin 0 -> 1158740 bytes ...op-yarn-applications-distributedshell-2.2.0.jar | Bin 0 -> 32509 bytes ...rn-applications-unmanaged-am-launcher-2.2.0.jar | Bin 0 -> 13299 bytes .../share/hadoop/yarn/hadoop-yarn-client-2.2.0.jar | Bin 0 -> 94754 bytes .../share/hadoop/yarn/hadoop-yarn-common-2.2.0.jar | Bin 0 -> 1301644 bytes .../yarn/hadoop-yarn-server-common-2.2.0.jar | Bin 0 -> 175522 bytes .../yarn/hadoop-yarn-server-nodemanager-2.2.0.jar | Bin 0 -> 467789 bytes .../hadoop-yarn-server-resourcemanager-2.2.0.jar | Bin 0 -> 615701 bytes .../hadoop/yarn/hadoop-yarn-server-tests-2.2.0.jar | Bin 0 -> 2137 bytes .../yarn/hadoop-yarn-server-web-proxy-2.2.0.jar | Bin 0 -> 25701 bytes .../share/hadoop/yarn/hadoop-yarn-site-2.2.0.jar | Bin 0 -> 1930 bytes .../hadoop/yarn/lib-examples/hsqldb-2.0.0.jar | Bin 0 -> 1256297 bytes aarch64/share/hadoop/yarn/lib/aopalliance-1.0.jar | Bin 0 -> 4467 bytes aarch64/share/hadoop/yarn/lib/asm-3.2.jar | Bin 0 -> 43398 bytes aarch64/share/hadoop/yarn/lib/avro-1.7.4.jar | Bin 0 -> 303139 bytes .../hadoop/yarn/lib/commons-compress-1.4.1.jar | Bin 0 -> 241367 bytes aarch64/share/hadoop/yarn/lib/commons-io-2.1.jar | Bin 0 -> 163151 bytes aarch64/share/hadoop/yarn/lib/guice-3.0.jar | Bin 0 -> 710492 bytes .../share/hadoop/yarn/lib/guice-servlet-3.0.jar | Bin 0 -> 65012 bytes .../hadoop/yarn/lib/hadoop-annotations-2.2.0.jar | Bin 0 -> 16781 bytes .../share/hadoop/yarn/lib/hamcrest-core-1.1.jar | Bin 0 -> 76643 bytes .../hadoop/yarn/lib/jackson-core-asl-1.8.8.jar | Bin 0 -> 227500 bytes .../hadoop/yarn/lib/jackson-mapper-asl-1.8.8.jar | Bin 0 -> 668564 bytes aarch64/share/hadoop/yarn/lib/javax.inject-1.jar | Bin 0 -> 2497 bytes aarch64/share/hadoop/yarn/lib/jersey-core-1.9.jar | Bin 0 -> 458739 bytes aarch64/share/hadoop/yarn/lib/jersey-guice-1.9.jar | Bin 0 -> 14786 bytes .../share/hadoop/yarn/lib/jersey-server-1.9.jar | Bin 0 -> 713089 bytes aarch64/share/hadoop/yarn/lib/junit-4.10.jar | Bin 0 -> 253160 bytes aarch64/share/hadoop/yarn/lib/log4j-1.2.17.jar | Bin 0 -> 489884 bytes .../share/hadoop/yarn/lib/netty-3.6.2.Final.jar | Bin 0 -> 1199572 bytes aarch64/share/hadoop/yarn/lib/paranamer-2.3.jar | Bin 0 -> 29555 bytes .../share/hadoop/yarn/lib/protobuf-java-2.5.0.jar | Bin 0 -> 533455 bytes .../share/hadoop/yarn/lib/snappy-java-1.0.4.1.jar | Bin 0 -> 995968 bytes aarch64/share/hadoop/yarn/lib/xz-1.0.jar | Bin 0 -> 94672 bytes .../yarn/sources/hadoop-yarn-api-2.2.0-sources.jar | Bin 0 -> 360318 bytes ...applications-distributedshell-2.2.0-sources.jar | Bin 0 -> 19273 bytes ...cations-distributedshell-2.2.0-test-sources.jar | Bin 0 -> 6355 bytes ...cations-unmanaged-am-launcher-2.2.0-sources.jar | Bin 0 -> 6265 bytes ...ns-unmanaged-am-launcher-2.2.0-test-sources.jar | Bin 0 -> 4941 bytes .../sources/hadoop-yarn-client-2.2.0-sources.jar | Bin 0 -> 59384 bytes .../hadoop-yarn-client-2.2.0-test-sources.jar | Bin 0 -> 35662 bytes .../sources/hadoop-yarn-common-2.2.0-sources.jar | Bin 0 -> 634756 bytes .../hadoop-yarn-common-2.2.0-test-sources.jar | Bin 0 -> 79714 bytes .../hadoop-yarn-server-common-2.2.0-sources.jar | Bin 0 -> 76814 bytes ...adoop-yarn-server-common-2.2.0-test-sources.jar | Bin 0 -> 7884 bytes ...adoop-yarn-server-nodemanager-2.2.0-sources.jar | Bin 0 -> 262437 bytes ...-yarn-server-nodemanager-2.2.0-test-sources.jar | Bin 0 -> 158721 bytes ...p-yarn-server-resourcemanager-2.2.0-sources.jar | Bin 0 -> 387489 bytes ...n-server-resourcemanager-2.2.0-test-sources.jar | Bin 0 -> 246635 bytes ...hadoop-yarn-server-tests-2.2.0-test-sources.jar | Bin 0 -> 18425 bytes .../hadoop-yarn-server-web-proxy-2.2.0-sources.jar | Bin 0 -> 17741 bytes ...op-yarn-server-web-proxy-2.2.0-test-sources.jar | Bin 0 -> 5907 bytes .../test/hadoop-yarn-server-tests-2.2.0-tests.jar | Bin 0 -> 35375 bytes 565 files changed, 633839 insertions(+) create mode 100755 aarch64/bin/container-executor create mode 100755 aarch64/bin/hadoop create mode 100755 aarch64/bin/hadoop.cmd create mode 100755 aarch64/bin/hdfs create mode 100755 aarch64/bin/hdfs.cmd create mode 100755 aarch64/bin/mapred create mode 100755 aarch64/bin/mapred.cmd create mode 100755 aarch64/bin/rcc create mode 100755 aarch64/bin/test-container-executor create mode 100755 aarch64/bin/yarn create mode 100755 aarch64/bin/yarn.cmd create mode 100644 aarch64/etc/hadoop/capacity-scheduler.xml create mode 100644 aarch64/etc/hadoop/configuration.xsl create mode 100644 aarch64/etc/hadoop/container-executor.cfg create mode 100644 aarch64/etc/hadoop/core-site.xml create mode 100644 aarch64/etc/hadoop/hadoop-env.cmd create mode 100644 aarch64/etc/hadoop/hadoop-env.sh create mode 100644 aarch64/etc/hadoop/hadoop-metrics.properties create mode 100644 aarch64/etc/hadoop/hadoop-metrics2.properties create mode 100644 aarch64/etc/hadoop/hadoop-policy.xml create mode 100644 aarch64/etc/hadoop/hdfs-site.xml create mode 100644 aarch64/etc/hadoop/httpfs-env.sh create mode 100644 aarch64/etc/hadoop/httpfs-log4j.properties create mode 100644 aarch64/etc/hadoop/httpfs-signature.secret create mode 100644 aarch64/etc/hadoop/httpfs-site.xml create mode 100644 aarch64/etc/hadoop/log4j.properties create mode 100644 aarch64/etc/hadoop/mapred-env.cmd create mode 100644 aarch64/etc/hadoop/mapred-env.sh create mode 100644 aarch64/etc/hadoop/mapred-queues.xml.template create mode 100644 aarch64/etc/hadoop/mapred-site.xml.template create mode 100644 aarch64/etc/hadoop/slaves create mode 100644 aarch64/etc/hadoop/ssl-client.xml.example create mode 100644 aarch64/etc/hadoop/ssl-server.xml.example create mode 100644 aarch64/etc/hadoop/yarn-env.cmd create mode 100644 aarch64/etc/hadoop/yarn-env.sh create mode 100644 aarch64/etc/hadoop/yarn-site.xml create mode 100644 aarch64/include/Pipes.hh create mode 100644 aarch64/include/SerialUtils.hh create mode 100644 aarch64/include/StringUtils.hh create mode 100644 aarch64/include/TemplateFactory.hh create mode 100644 aarch64/include/hdfs.h create mode 100644 aarch64/lib/native/libhadoop.a create mode 120000 aarch64/lib/native/libhadoop.so create mode 100755 aarch64/lib/native/libhadoop.so.1.0.0 create mode 100644 aarch64/lib/native/libhadooppipes.a create mode 100644 aarch64/lib/native/libhadooputils.a create mode 100644 aarch64/lib/native/libhdfs.a create mode 120000 aarch64/lib/native/libhdfs.so create mode 100755 aarch64/lib/native/libhdfs.so.0.0.0 create mode 100755 aarch64/libexec/hadoop-config.cmd create mode 100755 aarch64/libexec/hadoop-config.sh create mode 100755 aarch64/libexec/hdfs-config.cmd create mode 100755 aarch64/libexec/hdfs-config.sh create mode 100755 aarch64/libexec/httpfs-config.sh create mode 100755 aarch64/libexec/mapred-config.cmd create mode 100755 aarch64/libexec/mapred-config.sh create mode 100755 aarch64/libexec/yarn-config.cmd create mode 100755 aarch64/libexec/yarn-config.sh create mode 100755 aarch64/sbin/distribute-exclude.sh create mode 100755 aarch64/sbin/hadoop-daemon.sh create mode 100755 aarch64/sbin/hadoop-daemons.sh create mode 100755 aarch64/sbin/hdfs-config.cmd create mode 100755 aarch64/sbin/hdfs-config.sh create mode 100755 aarch64/sbin/httpfs.sh create mode 100755 aarch64/sbin/mr-jobhistory-daemon.sh create mode 100755 aarch64/sbin/refresh-namenodes.sh create mode 100755 aarch64/sbin/slaves.sh create mode 100755 aarch64/sbin/start-all.cmd create mode 100755 aarch64/sbin/start-all.sh create mode 100755 aarch64/sbin/start-balancer.sh create mode 100755 aarch64/sbin/start-dfs.cmd create mode 100755 aarch64/sbin/start-dfs.sh create mode 100755 aarch64/sbin/start-secure-dns.sh create mode 100755 aarch64/sbin/start-yarn.cmd create mode 100755 aarch64/sbin/start-yarn.sh create mode 100755 aarch64/sbin/stop-all.cmd create mode 100755 aarch64/sbin/stop-all.sh create mode 100755 aarch64/sbin/stop-balancer.sh create mode 100755 aarch64/sbin/stop-dfs.cmd create mode 100755 aarch64/sbin/stop-dfs.sh create mode 100755 aarch64/sbin/stop-secure-dns.sh create mode 100755 aarch64/sbin/stop-yarn.cmd create mode 100755 aarch64/sbin/stop-yarn.sh create mode 100755 aarch64/sbin/yarn-daemon.sh create mode 100755 aarch64/sbin/yarn-daemons.sh create mode 100644 aarch64/share/doc/hadoop/common/CHANGES.txt create mode 100644 aarch64/share/doc/hadoop/common/LICENSE.txt create mode 100644 aarch64/share/doc/hadoop/common/NOTICE.txt create mode 100644 aarch64/share/doc/hadoop/common/README.txt create mode 100644 aarch64/share/doc/hadoop/hdfs/CHANGES.txt create mode 100644 aarch64/share/doc/hadoop/hdfs/LICENSE.txt create mode 100644 aarch64/share/doc/hadoop/hdfs/NOTICE.txt create mode 100644 aarch64/share/doc/hadoop/mapreduce/CHANGES.txt create mode 100644 aarch64/share/doc/hadoop/mapreduce/LICENSE.txt create mode 100644 aarch64/share/doc/hadoop/mapreduce/NOTICE.txt create mode 100644 aarch64/share/doc/hadoop/yarn/CHANGES.txt create mode 100644 aarch64/share/doc/hadoop/yarn/LICENSE.txt create mode 100644 aarch64/share/doc/hadoop/yarn/NOTICE.txt create mode 100644 aarch64/share/hadoop/common/hadoop-common-2.2.0-tests.jar create mode 100644 aarch64/share/hadoop/common/hadoop-common-2.2.0.jar create mode 100644 aarch64/share/hadoop/common/hadoop-nfs-2.2.0.jar create mode 100644 aarch64/share/hadoop/common/jdiff/hadoop-core_0.20.0.xml create mode 100644 aarch64/share/hadoop/common/jdiff/hadoop-core_0.21.0.xml create mode 100644 aarch64/share/hadoop/common/jdiff/hadoop-core_0.22.0.xml create mode 100644 aarch64/share/hadoop/common/jdiff/hadoop_0.17.0.xml create mode 100644 aarch64/share/hadoop/common/jdiff/hadoop_0.18.1.xml create mode 100644 aarch64/share/hadoop/common/jdiff/hadoop_0.18.2.xml create mode 100644 aarch64/share/hadoop/common/jdiff/hadoop_0.18.3.xml create mode 100644 aarch64/share/hadoop/common/jdiff/hadoop_0.19.0.xml create mode 100644 aarch64/share/hadoop/common/jdiff/hadoop_0.19.1.xml create mode 100644 aarch64/share/hadoop/common/jdiff/hadoop_0.19.2.xml create mode 100644 aarch64/share/hadoop/common/jdiff/hadoop_0.20.0.xml create mode 100644 aarch64/share/hadoop/common/jdiff/hadoop_0.20.1.xml create mode 100644 aarch64/share/hadoop/common/jdiff/hadoop_0.20.2.xml create mode 100644 aarch64/share/hadoop/common/lib/activation-1.1.jar create mode 100644 aarch64/share/hadoop/common/lib/asm-3.2.jar create mode 100644 aarch64/share/hadoop/common/lib/avro-1.7.4.jar create mode 100644 aarch64/share/hadoop/common/lib/commons-beanutils-1.7.0.jar create mode 100644 aarch64/share/hadoop/common/lib/commons-beanutils-core-1.8.0.jar create mode 100644 aarch64/share/hadoop/common/lib/commons-cli-1.2.jar create mode 100644 aarch64/share/hadoop/common/lib/commons-codec-1.4.jar create mode 100644 aarch64/share/hadoop/common/lib/commons-collections-3.2.1.jar create mode 100644 aarch64/share/hadoop/common/lib/commons-compress-1.4.1.jar create mode 100644 aarch64/share/hadoop/common/lib/commons-configuration-1.6.jar create mode 100644 aarch64/share/hadoop/common/lib/commons-digester-1.8.jar create mode 100644 aarch64/share/hadoop/common/lib/commons-el-1.0.jar create mode 100644 aarch64/share/hadoop/common/lib/commons-httpclient-3.1.jar create mode 100644 aarch64/share/hadoop/common/lib/commons-io-2.1.jar create mode 100644 aarch64/share/hadoop/common/lib/commons-lang-2.5.jar create mode 100644 aarch64/share/hadoop/common/lib/commons-logging-1.1.1.jar create mode 100644 aarch64/share/hadoop/common/lib/commons-math-2.1.jar create mode 100644 aarch64/share/hadoop/common/lib/commons-net-3.1.jar create mode 100644 aarch64/share/hadoop/common/lib/guava-11.0.2.jar create mode 100644 aarch64/share/hadoop/common/lib/hadoop-annotations-2.2.0.jar create mode 100644 aarch64/share/hadoop/common/lib/hadoop-auth-2.2.0.jar create mode 100644 aarch64/share/hadoop/common/lib/jackson-core-asl-1.8.8.jar create mode 100644 aarch64/share/hadoop/common/lib/jackson-jaxrs-1.8.8.jar create mode 100644 aarch64/share/hadoop/common/lib/jackson-mapper-asl-1.8.8.jar create mode 100644 aarch64/share/hadoop/common/lib/jackson-xc-1.8.8.jar create mode 100644 aarch64/share/hadoop/common/lib/jasper-compiler-5.5.23.jar create mode 100644 aarch64/share/hadoop/common/lib/jasper-runtime-5.5.23.jar create mode 100644 aarch64/share/hadoop/common/lib/jaxb-api-2.2.2.jar create mode 100644 aarch64/share/hadoop/common/lib/jaxb-impl-2.2.3-1.jar create mode 100644 aarch64/share/hadoop/common/lib/jersey-core-1.9.jar create mode 100644 aarch64/share/hadoop/common/lib/jersey-json-1.9.jar create mode 100644 aarch64/share/hadoop/common/lib/jersey-server-1.9.jar create mode 100644 aarch64/share/hadoop/common/lib/jets3t-0.6.1.jar create mode 100644 aarch64/share/hadoop/common/lib/jettison-1.1.jar create mode 100644 aarch64/share/hadoop/common/lib/jetty-6.1.26.jar create mode 100644 aarch64/share/hadoop/common/lib/jetty-util-6.1.26.jar create mode 100644 aarch64/share/hadoop/common/lib/jsch-0.1.42.jar create mode 100644 aarch64/share/hadoop/common/lib/jsp-api-2.1.jar create mode 100644 aarch64/share/hadoop/common/lib/jsr305-1.3.9.jar create mode 100644 aarch64/share/hadoop/common/lib/junit-4.8.2.jar create mode 100644 aarch64/share/hadoop/common/lib/log4j-1.2.17.jar create mode 100644 aarch64/share/hadoop/common/lib/mockito-all-1.8.5.jar create mode 100644 aarch64/share/hadoop/common/lib/netty-3.6.2.Final.jar create mode 100644 aarch64/share/hadoop/common/lib/paranamer-2.3.jar create mode 100644 aarch64/share/hadoop/common/lib/protobuf-java-2.5.0.jar create mode 100644 aarch64/share/hadoop/common/lib/servlet-api-2.5.jar create mode 100644 aarch64/share/hadoop/common/lib/slf4j-api-1.7.5.jar create mode 100644 aarch64/share/hadoop/common/lib/slf4j-log4j12-1.7.5.jar create mode 100644 aarch64/share/hadoop/common/lib/snappy-java-1.0.4.1.jar create mode 100644 aarch64/share/hadoop/common/lib/stax-api-1.0.1.jar create mode 100644 aarch64/share/hadoop/common/lib/xmlenc-0.52.jar create mode 100644 aarch64/share/hadoop/common/lib/xz-1.0.jar create mode 100644 aarch64/share/hadoop/common/lib/zookeeper-3.4.5.jar create mode 100644 aarch64/share/hadoop/common/sources/hadoop-common-2.2.0-sources.jar create mode 100644 aarch64/share/hadoop/common/sources/hadoop-common-2.2.0-test-sources.jar create mode 100644 aarch64/share/hadoop/common/templates/core-site.xml create mode 100644 aarch64/share/hadoop/hdfs/hadoop-hdfs-2.2.0-tests.jar create mode 100644 aarch64/share/hadoop/hdfs/hadoop-hdfs-2.2.0.jar create mode 100644 aarch64/share/hadoop/hdfs/hadoop-hdfs-nfs-2.2.0.jar create mode 100644 aarch64/share/hadoop/hdfs/jdiff/hadoop-hdfs_0.20.0.xml create mode 100644 aarch64/share/hadoop/hdfs/jdiff/hadoop-hdfs_0.21.0.xml create mode 100644 aarch64/share/hadoop/hdfs/jdiff/hadoop-hdfs_0.22.0.xml create mode 100644 aarch64/share/hadoop/hdfs/lib/asm-3.2.jar create mode 100644 aarch64/share/hadoop/hdfs/lib/commons-cli-1.2.jar create mode 100644 aarch64/share/hadoop/hdfs/lib/commons-codec-1.4.jar create mode 100644 aarch64/share/hadoop/hdfs/lib/commons-daemon-1.0.13.jar create mode 100644 aarch64/share/hadoop/hdfs/lib/commons-el-1.0.jar create mode 100644 aarch64/share/hadoop/hdfs/lib/commons-io-2.1.jar create mode 100644 aarch64/share/hadoop/hdfs/lib/commons-lang-2.5.jar create mode 100644 aarch64/share/hadoop/hdfs/lib/commons-logging-1.1.1.jar create mode 100644 aarch64/share/hadoop/hdfs/lib/guava-11.0.2.jar create mode 100644 aarch64/share/hadoop/hdfs/lib/jackson-core-asl-1.8.8.jar create mode 100644 aarch64/share/hadoop/hdfs/lib/jackson-mapper-asl-1.8.8.jar create mode 100644 aarch64/share/hadoop/hdfs/lib/jasper-runtime-5.5.23.jar create mode 100644 aarch64/share/hadoop/hdfs/lib/jersey-core-1.9.jar create mode 100644 aarch64/share/hadoop/hdfs/lib/jersey-server-1.9.jar create mode 100644 aarch64/share/hadoop/hdfs/lib/jetty-6.1.26.jar create mode 100644 aarch64/share/hadoop/hdfs/lib/jetty-util-6.1.26.jar create mode 100644 aarch64/share/hadoop/hdfs/lib/jsp-api-2.1.jar create mode 100644 aarch64/share/hadoop/hdfs/lib/jsr305-1.3.9.jar create mode 100644 aarch64/share/hadoop/hdfs/lib/log4j-1.2.17.jar create mode 100644 aarch64/share/hadoop/hdfs/lib/netty-3.6.2.Final.jar create mode 100644 aarch64/share/hadoop/hdfs/lib/protobuf-java-2.5.0.jar create mode 100644 aarch64/share/hadoop/hdfs/lib/servlet-api-2.5.jar create mode 100644 aarch64/share/hadoop/hdfs/lib/xmlenc-0.52.jar create mode 100644 aarch64/share/hadoop/hdfs/sources/hadoop-hdfs-2.2.0-sources.jar create mode 100644 aarch64/share/hadoop/hdfs/sources/hadoop-hdfs-2.2.0-test-sources.jar create mode 100644 aarch64/share/hadoop/hdfs/templates/hdfs-site.xml create mode 100644 aarch64/share/hadoop/hdfs/webapps/datanode/WEB-INF/web.xml create mode 100644 aarch64/share/hadoop/hdfs/webapps/datanode/robots.txt create mode 100644 aarch64/share/hadoop/hdfs/webapps/hdfs/WEB-INF/web.xml create mode 100644 aarch64/share/hadoop/hdfs/webapps/hdfs/decommission.xsl create mode 100644 aarch64/share/hadoop/hdfs/webapps/hdfs/dfsclusterhealth.xsl create mode 100644 aarch64/share/hadoop/hdfs/webapps/hdfs/dfsclusterhealth_utils.xsl create mode 100644 aarch64/share/hadoop/hdfs/webapps/hdfs/index.html create mode 100644 aarch64/share/hadoop/hdfs/webapps/journal/WEB-INF/web.xml create mode 100644 aarch64/share/hadoop/hdfs/webapps/journal/index.html create mode 100644 aarch64/share/hadoop/hdfs/webapps/secondary/WEB-INF/web.xml create mode 100644 aarch64/share/hadoop/hdfs/webapps/secondary/index.html create mode 100644 aarch64/share/hadoop/hdfs/webapps/static/hadoop.css create mode 100644 aarch64/share/hadoop/httpfs/tomcat/LICENSE create mode 100644 aarch64/share/hadoop/httpfs/tomcat/NOTICE create mode 100644 aarch64/share/hadoop/httpfs/tomcat/RELEASE-NOTES create mode 100644 aarch64/share/hadoop/httpfs/tomcat/RUNNING.txt create mode 100644 aarch64/share/hadoop/httpfs/tomcat/bin/bootstrap.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/bin/catalina-tasks.xml create mode 100644 aarch64/share/hadoop/httpfs/tomcat/bin/catalina.bat create mode 100755 aarch64/share/hadoop/httpfs/tomcat/bin/catalina.sh create mode 100644 aarch64/share/hadoop/httpfs/tomcat/bin/commons-daemon-native.tar.gz create mode 100644 aarch64/share/hadoop/httpfs/tomcat/bin/commons-daemon.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/bin/cpappend.bat create mode 100644 aarch64/share/hadoop/httpfs/tomcat/bin/digest.bat create mode 100755 aarch64/share/hadoop/httpfs/tomcat/bin/digest.sh create mode 100644 aarch64/share/hadoop/httpfs/tomcat/bin/setclasspath.bat create mode 100755 aarch64/share/hadoop/httpfs/tomcat/bin/setclasspath.sh create mode 100644 aarch64/share/hadoop/httpfs/tomcat/bin/shutdown.bat create mode 100755 aarch64/share/hadoop/httpfs/tomcat/bin/shutdown.sh create mode 100644 aarch64/share/hadoop/httpfs/tomcat/bin/startup.bat create mode 100755 aarch64/share/hadoop/httpfs/tomcat/bin/startup.sh create mode 100644 aarch64/share/hadoop/httpfs/tomcat/bin/tomcat-juli.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/bin/tomcat-native.tar.gz create mode 100644 aarch64/share/hadoop/httpfs/tomcat/bin/tool-wrapper.bat create mode 100755 aarch64/share/hadoop/httpfs/tomcat/bin/tool-wrapper.sh create mode 100644 aarch64/share/hadoop/httpfs/tomcat/bin/version.bat create mode 100755 aarch64/share/hadoop/httpfs/tomcat/bin/version.sh create mode 100644 aarch64/share/hadoop/httpfs/tomcat/conf/catalina.policy create mode 100644 aarch64/share/hadoop/httpfs/tomcat/conf/catalina.properties create mode 100644 aarch64/share/hadoop/httpfs/tomcat/conf/context.xml create mode 100644 aarch64/share/hadoop/httpfs/tomcat/conf/logging.properties create mode 100644 aarch64/share/hadoop/httpfs/tomcat/conf/server.xml create mode 100644 aarch64/share/hadoop/httpfs/tomcat/conf/tomcat-users.xml create mode 100644 aarch64/share/hadoop/httpfs/tomcat/conf/web.xml create mode 100644 aarch64/share/hadoop/httpfs/tomcat/lib/annotations-api.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/lib/catalina-ant.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/lib/catalina-ha.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/lib/catalina-tribes.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/lib/catalina.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/lib/ecj-3.7.2.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/lib/el-api.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/lib/jasper-el.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/lib/jasper.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/lib/jsp-api.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/lib/servlet-api.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/lib/tomcat-coyote.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/lib/tomcat-dbcp.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/lib/tomcat-i18n-es.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/lib/tomcat-i18n-fr.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/lib/tomcat-i18n-ja.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/temp/safeToDelete.tmp create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/ROOT/WEB-INF/web.xml create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/ROOT/index.html create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/default-log4j.properties create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/httpfs-default.xml create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/httpfs.properties create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$1.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$2.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$3.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$4.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$5.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$6.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$FILE_TYPE.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$HttpFSDataInputStream.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$HttpFSDataOutputStream.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$Operation.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSKerberosAuthenticator$DelegationTokenOperation.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSKerberosAuthenticator.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSPseudoAuthenticator.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSUtils.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/CheckUploadContentTypeFilter.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSAppend.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSConcat.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSContentSummary.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSCreate.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSDelete.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSFileChecksum.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSFileStatus.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSHomeDir.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSListStatus.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSMkdirs.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSOpen.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSRename.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSSetOwner.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSSetPermission.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSSetReplication.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSSetTimes.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSAuthenticationFilter.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSExceptionProvider.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSKerberosAuthenticationHandler$1.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSKerberosAuthenticationHandler.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$AccessTimeParam.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$BlockSizeParam.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$DataParam.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$DestinationParam.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$DoAsParam.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$FilterParam.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$GroupParam.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$LenParam.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$ModifiedTimeParam.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$OffsetParam.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$OperationParam.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$OverwriteParam.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$OwnerParam.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$PermissionParam.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$RecursiveParam.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$ReplicationParam.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$SourcesParam.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSReleaseFilter.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSServer$1.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSServer.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSServerWebApp.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/lang/RunnableCallable.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/lang/XException$ERROR.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/lang/XException.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/server/BaseService.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/server/Server$Status.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/server/Server.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/server/ServerException$ERROR.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/server/ServerException.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/server/Service.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/server/ServiceException.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/DelegationTokenIdentifier.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/DelegationTokenManager.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/DelegationTokenManagerException$ERROR.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/DelegationTokenManagerException.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/FileSystemAccess$FileSystemExecutor.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/FileSystemAccess.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/FileSystemAccessException$ERROR.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/FileSystemAccessException.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/Groups.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/Instrumentation$Cron.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/Instrumentation$Variable.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/Instrumentation.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/ProxyUser.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/Scheduler.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService$1.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService$2.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService$3.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService$4.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService$CachedFileSystem.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService$FileSystemCachePurger.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$1.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$2.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$3.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$Cron.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$Sampler.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$SamplersRunnable.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$Timer.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$VariableHolder.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/scheduler/SchedulerService$1.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/scheduler/SchedulerService.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/security/DelegationTokenManagerService$DelegationTokenSecretManager.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/security/DelegationTokenManagerService.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/security/GroupsService.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/security/ProxyUserService$ERROR.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/security/ProxyUserService.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/servlet/FileSystemReleaseFilter.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/servlet/HostnameFilter.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/servlet/MDCFilter.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/servlet/ServerWebApp.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/util/Check.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/util/ConfigurationUtils.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/BooleanParam.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/ByteParam.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/EnumParam.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/ExceptionProvider.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/InputStreamEntity.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/IntegerParam.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/JSONMapProvider.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/JSONProvider.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/LongParam.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/Param.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/Parameters.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/ParametersProvider.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/ShortParam.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/StringParam.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/UserProvider$1.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/UserProvider$UserParam.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/UserProvider.class create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/activation-1.1.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/asm-3.2.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/avro-1.7.4.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-beanutils-1.7.0.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-beanutils-core-1.8.0.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-cli-1.2.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-codec-1.4.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-collections-3.2.1.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-compress-1.4.1.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-configuration-1.6.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-daemon-1.0.13.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-digester-1.8.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-io-2.1.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-lang-2.5.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-logging-1.1.1.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-math-2.1.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-net-3.1.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/guava-11.0.2.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/hadoop-annotations-2.2.0.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/hadoop-auth-2.2.0.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/hadoop-common-2.2.0.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/hadoop-hdfs-2.2.0.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jackson-core-asl-1.8.8.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jackson-jaxrs-1.8.8.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jackson-mapper-asl-1.8.8.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jackson-xc-1.8.8.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jaxb-api-2.2.2.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jaxb-impl-2.2.3-1.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jersey-core-1.9.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jersey-json-1.9.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jersey-server-1.9.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jettison-1.1.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jsch-0.1.42.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/json-simple-1.1.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jsr305-1.3.9.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/log4j-1.2.17.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/paranamer-2.3.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/protobuf-java-2.5.0.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/slf4j-api-1.7.5.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/slf4j-log4j12-1.7.5.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/snappy-java-1.0.4.1.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/stax-api-1.0.1.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/xmlenc-0.52.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/xz-1.0.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/zookeeper-3.4.5.jar create mode 100644 aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/web.xml create mode 100644 aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-app-2.2.0.jar create mode 100644 aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-common-2.2.0.jar create mode 100644 aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-core-2.2.0.jar create mode 100644 aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-hs-2.2.0.jar create mode 100644 aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-hs-plugins-2.2.0.jar create mode 100644 aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-2.2.0-tests.jar create mode 100644 aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-2.2.0.jar create mode 100644 aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-shuffle-2.2.0.jar create mode 100644 aarch64/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.2.0.jar create mode 100644 aarch64/share/hadoop/mapreduce/lib-examples/hsqldb-2.0.0.jar create mode 100644 aarch64/share/hadoop/mapreduce/lib/aopalliance-1.0.jar create mode 100644 aarch64/share/hadoop/mapreduce/lib/asm-3.2.jar create mode 100644 aarch64/share/hadoop/mapreduce/lib/avro-1.7.4.jar create mode 100644 aarch64/share/hadoop/mapreduce/lib/commons-compress-1.4.1.jar create mode 100644 aarch64/share/hadoop/mapreduce/lib/commons-io-2.1.jar create mode 100644 aarch64/share/hadoop/mapreduce/lib/guice-3.0.jar create mode 100644 aarch64/share/hadoop/mapreduce/lib/guice-servlet-3.0.jar create mode 100644 aarch64/share/hadoop/mapreduce/lib/hadoop-annotations-2.2.0.jar create mode 100644 aarch64/share/hadoop/mapreduce/lib/hamcrest-core-1.1.jar create mode 100644 aarch64/share/hadoop/mapreduce/lib/jackson-core-asl-1.8.8.jar create mode 100644 aarch64/share/hadoop/mapreduce/lib/jackson-mapper-asl-1.8.8.jar create mode 100644 aarch64/share/hadoop/mapreduce/lib/javax.inject-1.jar create mode 100644 aarch64/share/hadoop/mapreduce/lib/jersey-core-1.9.jar create mode 100644 aarch64/share/hadoop/mapreduce/lib/jersey-guice-1.9.jar create mode 100644 aarch64/share/hadoop/mapreduce/lib/jersey-server-1.9.jar create mode 100644 aarch64/share/hadoop/mapreduce/lib/junit-4.10.jar create mode 100644 aarch64/share/hadoop/mapreduce/lib/log4j-1.2.17.jar create mode 100644 aarch64/share/hadoop/mapreduce/lib/netty-3.6.2.Final.jar create mode 100644 aarch64/share/hadoop/mapreduce/lib/paranamer-2.3.jar create mode 100644 aarch64/share/hadoop/mapreduce/lib/protobuf-java-2.5.0.jar create mode 100644 aarch64/share/hadoop/mapreduce/lib/snappy-java-1.0.4.1.jar create mode 100644 aarch64/share/hadoop/mapreduce/lib/xz-1.0.jar create mode 100644 aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-app-2.2.0-sources.jar create mode 100644 aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-app-2.2.0-test-sources.jar create mode 100644 aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-common-2.2.0-sources.jar create mode 100644 aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-common-2.2.0-test-sources.jar create mode 100644 aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-core-2.2.0-sources.jar create mode 100644 aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-core-2.2.0-test-sources.jar create mode 100644 aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-hs-2.2.0-sources.jar create mode 100644 aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-hs-2.2.0-test-sources.jar create mode 100644 aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-hs-plugins-2.2.0-sources.jar create mode 100644 aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-hs-plugins-2.2.0-test-sources.jar create mode 100644 aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-jobclient-2.2.0-sources.jar create mode 100644 aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-jobclient-2.2.0-test-sources.jar create mode 100644 aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-shuffle-2.2.0-sources.jar create mode 100644 aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-shuffle-2.2.0-test-sources.jar create mode 100644 aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-examples-2.2.0-sources.jar create mode 100644 aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-examples-2.2.0-test-sources.jar create mode 100644 aarch64/share/hadoop/tools/lib/hadoop-archives-2.2.0.jar create mode 100644 aarch64/share/hadoop/tools/lib/hadoop-datajoin-2.2.0.jar create mode 100644 aarch64/share/hadoop/tools/lib/hadoop-distcp-2.2.0.jar create mode 100644 aarch64/share/hadoop/tools/lib/hadoop-extras-2.2.0.jar create mode 100644 aarch64/share/hadoop/tools/lib/hadoop-gridmix-2.2.0.jar create mode 100644 aarch64/share/hadoop/tools/lib/hadoop-rumen-2.2.0.jar create mode 100644 aarch64/share/hadoop/tools/lib/hadoop-streaming-2.2.0.jar create mode 100644 aarch64/share/hadoop/tools/sources/hadoop-archives-2.2.0-sources.jar create mode 100644 aarch64/share/hadoop/tools/sources/hadoop-archives-2.2.0-test-sources.jar create mode 100644 aarch64/share/hadoop/tools/sources/hadoop-datajoin-2.2.0-sources.jar create mode 100644 aarch64/share/hadoop/tools/sources/hadoop-datajoin-2.2.0-test-sources.jar create mode 100644 aarch64/share/hadoop/tools/sources/hadoop-distcp-2.2.0-sources.jar create mode 100644 aarch64/share/hadoop/tools/sources/hadoop-distcp-2.2.0-test-sources.jar create mode 100644 aarch64/share/hadoop/tools/sources/hadoop-extras-2.2.0-sources.jar create mode 100644 aarch64/share/hadoop/tools/sources/hadoop-extras-2.2.0-test-sources.jar create mode 100644 aarch64/share/hadoop/tools/sources/hadoop-gridmix-2.2.0-sources.jar create mode 100644 aarch64/share/hadoop/tools/sources/hadoop-gridmix-2.2.0-test-sources.jar create mode 100644 aarch64/share/hadoop/tools/sources/hadoop-rumen-2.2.0-sources.jar create mode 100644 aarch64/share/hadoop/tools/sources/hadoop-rumen-2.2.0-test-sources.jar create mode 100644 aarch64/share/hadoop/tools/sources/hadoop-streaming-2.2.0-sources.jar create mode 100644 aarch64/share/hadoop/tools/sources/hadoop-streaming-2.2.0-test-sources.jar create mode 100644 aarch64/share/hadoop/yarn/hadoop-yarn-api-2.2.0.jar create mode 100644 aarch64/share/hadoop/yarn/hadoop-yarn-applications-distributedshell-2.2.0.jar create mode 100644 aarch64/share/hadoop/yarn/hadoop-yarn-applications-unmanaged-am-launcher-2.2.0.jar create mode 100644 aarch64/share/hadoop/yarn/hadoop-yarn-client-2.2.0.jar create mode 100644 aarch64/share/hadoop/yarn/hadoop-yarn-common-2.2.0.jar create mode 100644 aarch64/share/hadoop/yarn/hadoop-yarn-server-common-2.2.0.jar create mode 100644 aarch64/share/hadoop/yarn/hadoop-yarn-server-nodemanager-2.2.0.jar create mode 100644 aarch64/share/hadoop/yarn/hadoop-yarn-server-resourcemanager-2.2.0.jar create mode 100644 aarch64/share/hadoop/yarn/hadoop-yarn-server-tests-2.2.0.jar create mode 100644 aarch64/share/hadoop/yarn/hadoop-yarn-server-web-proxy-2.2.0.jar create mode 100644 aarch64/share/hadoop/yarn/hadoop-yarn-site-2.2.0.jar create mode 100644 aarch64/share/hadoop/yarn/lib-examples/hsqldb-2.0.0.jar create mode 100644 aarch64/share/hadoop/yarn/lib/aopalliance-1.0.jar create mode 100644 aarch64/share/hadoop/yarn/lib/asm-3.2.jar create mode 100644 aarch64/share/hadoop/yarn/lib/avro-1.7.4.jar create mode 100644 aarch64/share/hadoop/yarn/lib/commons-compress-1.4.1.jar create mode 100644 aarch64/share/hadoop/yarn/lib/commons-io-2.1.jar create mode 100644 aarch64/share/hadoop/yarn/lib/guice-3.0.jar create mode 100644 aarch64/share/hadoop/yarn/lib/guice-servlet-3.0.jar create mode 100644 aarch64/share/hadoop/yarn/lib/hadoop-annotations-2.2.0.jar create mode 100644 aarch64/share/hadoop/yarn/lib/hamcrest-core-1.1.jar create mode 100644 aarch64/share/hadoop/yarn/lib/jackson-core-asl-1.8.8.jar create mode 100644 aarch64/share/hadoop/yarn/lib/jackson-mapper-asl-1.8.8.jar create mode 100644 aarch64/share/hadoop/yarn/lib/javax.inject-1.jar create mode 100644 aarch64/share/hadoop/yarn/lib/jersey-core-1.9.jar create mode 100644 aarch64/share/hadoop/yarn/lib/jersey-guice-1.9.jar create mode 100644 aarch64/share/hadoop/yarn/lib/jersey-server-1.9.jar create mode 100644 aarch64/share/hadoop/yarn/lib/junit-4.10.jar create mode 100644 aarch64/share/hadoop/yarn/lib/log4j-1.2.17.jar create mode 100644 aarch64/share/hadoop/yarn/lib/netty-3.6.2.Final.jar create mode 100644 aarch64/share/hadoop/yarn/lib/paranamer-2.3.jar create mode 100644 aarch64/share/hadoop/yarn/lib/protobuf-java-2.5.0.jar create mode 100644 aarch64/share/hadoop/yarn/lib/snappy-java-1.0.4.1.jar create mode 100644 aarch64/share/hadoop/yarn/lib/xz-1.0.jar create mode 100644 aarch64/share/hadoop/yarn/sources/hadoop-yarn-api-2.2.0-sources.jar create mode 100644 aarch64/share/hadoop/yarn/sources/hadoop-yarn-applications-distributedshell-2.2.0-sources.jar create mode 100644 aarch64/share/hadoop/yarn/sources/hadoop-yarn-applications-distributedshell-2.2.0-test-sources.jar create mode 100644 aarch64/share/hadoop/yarn/sources/hadoop-yarn-applications-unmanaged-am-launcher-2.2.0-sources.jar create mode 100644 aarch64/share/hadoop/yarn/sources/hadoop-yarn-applications-unmanaged-am-launcher-2.2.0-test-sources.jar create mode 100644 aarch64/share/hadoop/yarn/sources/hadoop-yarn-client-2.2.0-sources.jar create mode 100644 aarch64/share/hadoop/yarn/sources/hadoop-yarn-client-2.2.0-test-sources.jar create mode 100644 aarch64/share/hadoop/yarn/sources/hadoop-yarn-common-2.2.0-sources.jar create mode 100644 aarch64/share/hadoop/yarn/sources/hadoop-yarn-common-2.2.0-test-sources.jar create mode 100644 aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-common-2.2.0-sources.jar create mode 100644 aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-common-2.2.0-test-sources.jar create mode 100644 aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-nodemanager-2.2.0-sources.jar create mode 100644 aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-nodemanager-2.2.0-test-sources.jar create mode 100644 aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-resourcemanager-2.2.0-sources.jar create mode 100644 aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-resourcemanager-2.2.0-test-sources.jar create mode 100644 aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-tests-2.2.0-test-sources.jar create mode 100644 aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-web-proxy-2.2.0-sources.jar create mode 100644 aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-web-proxy-2.2.0-test-sources.jar create mode 100644 aarch64/share/hadoop/yarn/test/hadoop-yarn-server-tests-2.2.0-tests.jar (limited to 'aarch64') diff --git a/aarch64/bin/container-executor b/aarch64/bin/container-executor new file mode 100755 index 0000000..04f9973 Binary files /dev/null and b/aarch64/bin/container-executor differ diff --git a/aarch64/bin/hadoop b/aarch64/bin/hadoop new file mode 100755 index 0000000..be91771 --- /dev/null +++ b/aarch64/bin/hadoop @@ -0,0 +1,136 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This script runs the hadoop core commands. + +bin=`which $0` +bin=`dirname ${bin}` +bin=`cd "$bin"; pwd` + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +. $HADOOP_LIBEXEC_DIR/hadoop-config.sh + +function print_usage(){ + echo "Usage: hadoop [--config confdir] COMMAND" + echo " where COMMAND is one of:" + echo " fs run a generic filesystem user client" + echo " version print the version" + echo " jar run a jar file" + echo " checknative [-a|-h] check native hadoop and compression libraries availability" + echo " distcp copy file or directories recursively" + echo " archive -archiveName NAME -p * create a hadoop archive" + echo " classpath prints the class path needed to get the" + echo " Hadoop jar and the required libraries" + echo " daemonlog get/set the log level for each daemon" + echo " or" + echo " CLASSNAME run the class named CLASSNAME" + echo "" + echo "Most commands print help when invoked w/o parameters." +} + +if [ $# = 0 ]; then + print_usage + exit +fi + +COMMAND=$1 +case $COMMAND in + # usage flags + --help|-help|-h) + print_usage + exit + ;; + + #hdfs commands + namenode|secondarynamenode|datanode|dfs|dfsadmin|fsck|balancer|fetchdt|oiv|dfsgroups|portmap|nfs3) + echo "DEPRECATED: Use of this script to execute hdfs command is deprecated." 1>&2 + echo "Instead use the hdfs command for it." 1>&2 + echo "" 1>&2 + #try to locate hdfs and if present, delegate to it. + shift + if [ -f "${HADOOP_HDFS_HOME}"/bin/hdfs ]; then + exec "${HADOOP_HDFS_HOME}"/bin/hdfs ${COMMAND/dfsgroups/groups} "$@" + elif [ -f "${HADOOP_PREFIX}"/bin/hdfs ]; then + exec "${HADOOP_PREFIX}"/bin/hdfs ${COMMAND/dfsgroups/groups} "$@" + else + echo "HADOOP_HDFS_HOME not found!" + exit 1 + fi + ;; + + #mapred commands for backwards compatibility + pipes|job|queue|mrgroups|mradmin|jobtracker|tasktracker) + echo "DEPRECATED: Use of this script to execute mapred command is deprecated." 1>&2 + echo "Instead use the mapred command for it." 1>&2 + echo "" 1>&2 + #try to locate mapred and if present, delegate to it. + shift + if [ -f "${HADOOP_MAPRED_HOME}"/bin/mapred ]; then + exec "${HADOOP_MAPRED_HOME}"/bin/mapred ${COMMAND/mrgroups/groups} "$@" + elif [ -f "${HADOOP_PREFIX}"/bin/mapred ]; then + exec "${HADOOP_PREFIX}"/bin/mapred ${COMMAND/mrgroups/groups} "$@" + else + echo "HADOOP_MAPRED_HOME not found!" + exit 1 + fi + ;; + + classpath) + echo $CLASSPATH + exit + ;; + + #core commands + *) + # the core commands + if [ "$COMMAND" = "fs" ] ; then + CLASS=org.apache.hadoop.fs.FsShell + elif [ "$COMMAND" = "version" ] ; then + CLASS=org.apache.hadoop.util.VersionInfo + elif [ "$COMMAND" = "jar" ] ; then + CLASS=org.apache.hadoop.util.RunJar + elif [ "$COMMAND" = "checknative" ] ; then + CLASS=org.apache.hadoop.util.NativeLibraryChecker + elif [ "$COMMAND" = "distcp" ] ; then + CLASS=org.apache.hadoop.tools.DistCp + CLASSPATH=${CLASSPATH}:${TOOL_PATH} + elif [ "$COMMAND" = "daemonlog" ] ; then + CLASS=org.apache.hadoop.log.LogLevel + elif [ "$COMMAND" = "archive" ] ; then + CLASS=org.apache.hadoop.tools.HadoopArchives + CLASSPATH=${CLASSPATH}:${TOOL_PATH} + elif [[ "$COMMAND" = -* ]] ; then + # class and package names cannot begin with a - + echo "Error: No command named \`$COMMAND' was found. Perhaps you meant \`hadoop ${COMMAND#-}'" + exit 1 + else + CLASS=$COMMAND + fi + shift + + # Always respect HADOOP_OPTS and HADOOP_CLIENT_OPTS + HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS" + + #make sure security appender is turned off + HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender}" + + export CLASSPATH=$CLASSPATH + exec "$JAVA" $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@" + ;; + +esac diff --git a/aarch64/bin/hadoop.cmd b/aarch64/bin/hadoop.cmd new file mode 100755 index 0000000..63b2945 --- /dev/null +++ b/aarch64/bin/hadoop.cmd @@ -0,0 +1,240 @@ +@echo off +@rem Licensed to the Apache Software Foundation (ASF) under one or more +@rem contributor license agreements. See the NOTICE file distributed with +@rem this work for additional information regarding copyright ownership. +@rem The ASF licenses this file to You under the Apache License, Version 2.0 +@rem (the "License"); you may not use this file except in compliance with +@rem the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. + + +@rem This script runs the hadoop core commands. + +@rem Environment Variables +@rem +@rem JAVA_HOME The java implementation to use. Overrides JAVA_HOME. +@rem +@rem HADOOP_CLASSPATH Extra Java CLASSPATH entries. +@rem +@rem HADOOP_USER_CLASSPATH_FIRST When defined, the HADOOP_CLASSPATH is +@rem added in the beginning of the global +@rem classpath. Can be defined, for example, +@rem by doing +@rem export HADOOP_USER_CLASSPATH_FIRST=true +@rem +@rem HADOOP_HEAPSIZE The maximum amount of heap to use, in MB. +@rem Default is 1000. +@rem +@rem HADOOP_OPTS Extra Java runtime options. +@rem +@rem HADOOP_CLIENT_OPTS when the respective command is run. +@rem HADOOP_{COMMAND}_OPTS etc HADOOP_JT_OPTS applies to JobTracker +@rem for e.g. HADOOP_CLIENT_OPTS applies to +@rem more than one command (fs, dfs, fsck, +@rem dfsadmin etc) +@rem +@rem HADOOP_CONF_DIR Alternate conf dir. Default is ${HADOOP_HOME}/conf. +@rem +@rem HADOOP_ROOT_LOGGER The root appender. Default is INFO,console +@rem + +if not defined HADOOP_BIN_PATH ( + set HADOOP_BIN_PATH=%~dp0 +) + +if "%HADOOP_BIN_PATH:~-1%" == "\" ( + set HADOOP_BIN_PATH=%HADOOP_BIN_PATH:~0,-1% +) + +call :updatepath %HADOOP_BIN_PATH% + +:main + setlocal enabledelayedexpansion + + set DEFAULT_LIBEXEC_DIR=%HADOOP_BIN_PATH%\..\libexec + if not defined HADOOP_LIBEXEC_DIR ( + set HADOOP_LIBEXEC_DIR=%DEFAULT_LIBEXEC_DIR% + ) + + call %HADOOP_LIBEXEC_DIR%\hadoop-config.cmd %* + if "%1" == "--config" ( + shift + shift + ) + + set hadoop-command=%1 + if not defined hadoop-command ( + goto print_usage + ) + + call :make_command_arguments %* + + set hdfscommands=namenode secondarynamenode datanode dfs dfsadmin fsck balancer fetchdt oiv dfsgroups + for %%i in ( %hdfscommands% ) do ( + if %hadoop-command% == %%i set hdfscommand=true + ) + if defined hdfscommand ( + @echo DEPRECATED: Use of this script to execute hdfs command is deprecated. 1>&2 + @echo Instead use the hdfs command for it. 1>&2 + if exist %HADOOP_HDFS_HOME%\bin\hdfs.cmd ( + call %HADOOP_HDFS_HOME%\bin\hdfs.cmd %* + goto :eof + ) else if exist %HADOOP_HOME%\bin\hdfs.cmd ( + call %HADOOP_HOME%\bin\hdfs.cmd %* + goto :eof + ) else ( + echo HADOOP_HDFS_HOME not found! + goto :eof + ) + ) + + set mapredcommands=pipes job queue mrgroups mradmin jobtracker tasktracker + for %%i in ( %mapredcommands% ) do ( + if %hadoop-command% == %%i set mapredcommand=true + ) + if defined mapredcommand ( + @echo DEPRECATED: Use of this script to execute mapred command is deprecated. 1>&2 + @echo Instead use the mapred command for it. 1>&2 + if exist %HADOOP_MAPRED_HOME%\bin\mapred.cmd ( + call %HADOOP_MAPRED_HOME%\bin\mapred.cmd %* + goto :eof + ) else if exist %HADOOP_HOME%\bin\mapred.cmd ( + call %HADOOP_HOME%\bin\mapred.cmd %* + goto :eof + ) else ( + echo HADOOP_MAPRED_HOME not found! + goto :eof + ) + ) + + if %hadoop-command% == classpath ( + @echo %CLASSPATH% + goto :eof + ) + + set corecommands=fs version jar checknative distcp daemonlog archive + for %%i in ( %corecommands% ) do ( + if %hadoop-command% == %%i set corecommand=true + ) + if defined corecommand ( + call :%hadoop-command% + ) else ( + set CLASSPATH=%CLASSPATH%;%CD% + set CLASS=%hadoop-command% + ) + + set path=%PATH%;%HADOOP_BIN_PATH% + + @rem Always respect HADOOP_OPTS and HADOOP_CLIENT_OPTS + set HADOOP_OPTS=%HADOOP_OPTS% %HADOOP_CLIENT_OPTS% + + @rem make sure security appender is turned off + if not defined HADOOP_SECURITY_LOGGER ( + set HADOOP_SECURITY_LOGGER=INFO,NullAppender + ) + set HADOOP_OPTS=%HADOOP_OPTS% -Dhadoop.security.logger=%HADOOP_SECURITY_LOGGER% + + call %JAVA% %JAVA_HEAP_MAX% %HADOOP_OPTS% -classpath %CLASSPATH% %CLASS% %hadoop-command-arguments% + + goto :eof + +:fs + set CLASS=org.apache.hadoop.fs.FsShell + goto :eof + +:version + set CLASS=org.apache.hadoop.util.VersionInfo + goto :eof + +:jar + set CLASS=org.apache.hadoop.util.RunJar + goto :eof + +:checknative + set CLASS=org.apache.hadoop.util.NativeLibraryChecker + goto :eof + +:distcp + set CLASS=org.apache.hadoop.tools.DistCp + set CLASSPATH=%CLASSPATH%;%TOOL_PATH% + goto :eof + +:daemonlog + set CLASS=org.apache.hadoop.log.LogLevel + goto :eof + +:archive + set CLASS=org.apache.hadoop.tools.HadoopArchives + set CLASSPATH=%CLASSPATH%;%TOOL_PATH% + goto :eof + +:updatepath + set path_to_add=%* + set current_path_comparable=%path% + set current_path_comparable=%current_path_comparable: =_% + set current_path_comparable=%current_path_comparable:(=_% + set current_path_comparable=%current_path_comparable:)=_% + set path_to_add_comparable=%path_to_add% + set path_to_add_comparable=%path_to_add_comparable: =_% + set path_to_add_comparable=%path_to_add_comparable:(=_% + set path_to_add_comparable=%path_to_add_comparable:)=_% + + for %%i in ( %current_path_comparable% ) do ( + if /i "%%i" == "%path_to_add_comparable%" ( + set path_to_add_exist=true + ) + ) + set system_path_comparable= + set path_to_add_comparable= + if not defined path_to_add_exist path=%path_to_add%;%path% + set path_to_add= + goto :eof + +@rem This changes %1, %2 etc. Hence those cannot be used after calling this. +:make_command_arguments + if "%1" == "--config" ( + shift + shift + ) + if [%2] == [] goto :eof + shift + set _arguments= + :MakeCmdArgsLoop + if [%1]==[] goto :EndLoop + + if not defined _arguments ( + set _arguments=%1 + ) else ( + set _arguments=!_arguments! %1 + ) + shift + goto :MakeCmdArgsLoop + :EndLoop + set hadoop-command-arguments=%_arguments% + goto :eof + +:print_usage + @echo Usage: hadoop [--config confdir] COMMAND + @echo where COMMAND is one of: + @echo fs run a generic filesystem user client + @echo version print the version + @echo jar ^ run a jar file + @echo checknative [-a^|-h] check native hadoop and compression libraries availability + @echo distcp ^ ^ copy file or directories recursively + @echo archive -archiveName NAME -p ^ ^* ^ create a hadoop archive + @echo classpath prints the class path needed to get the + @echo Hadoop jar and the required libraries + @echo daemonlog get/set the log level for each daemon + @echo or + @echo CLASSNAME run the class named CLASSNAME + @echo. + @echo Most commands print help when invoked w/o parameters. + +endlocal diff --git a/aarch64/bin/hdfs b/aarch64/bin/hdfs new file mode 100755 index 0000000..24bb11f --- /dev/null +++ b/aarch64/bin/hdfs @@ -0,0 +1,203 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Environment Variables +# +# JSVC_HOME home directory of jsvc binary. Required for starting secure +# datanode. +# +# JSVC_OUTFILE path to jsvc output file. Defaults to +# $HADOOP_LOG_DIR/jsvc.out. +# +# JSVC_ERRFILE path to jsvc error file. Defaults to $HADOOP_LOG_DIR/jsvc.err. + +bin=`which $0` +bin=`dirname ${bin}` +bin=`cd "$bin"; pwd` + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +. $HADOOP_LIBEXEC_DIR/hdfs-config.sh + +function print_usage(){ + echo "Usage: hdfs [--config confdir] COMMAND" + echo " where COMMAND is one of:" + echo " dfs run a filesystem command on the file systems supported in Hadoop." + echo " namenode -format format the DFS filesystem" + echo " secondarynamenode run the DFS secondary namenode" + echo " namenode run the DFS namenode" + echo " journalnode run the DFS journalnode" + echo " zkfc run the ZK Failover Controller daemon" + echo " datanode run a DFS datanode" + echo " dfsadmin run a DFS admin client" + echo " haadmin run a DFS HA admin client" + echo " fsck run a DFS filesystem checking utility" + echo " balancer run a cluster balancing utility" + echo " jmxget get JMX exported values from NameNode or DataNode." + echo " oiv apply the offline fsimage viewer to an fsimage" + echo " oev apply the offline edits viewer to an edits file" + echo " fetchdt fetch a delegation token from the NameNode" + echo " getconf get config values from configuration" + echo " groups get the groups which users belong to" + echo " snapshotDiff diff two snapshots of a directory or diff the" + echo " current directory contents with a snapshot" + echo " lsSnapshottableDir list all snapshottable dirs owned by the current user" + echo " Use -help to see options" + echo " portmap run a portmap service" + echo " nfs3 run an NFS version 3 gateway" + echo "" + echo "Most commands print help when invoked w/o parameters." +} + +if [ $# = 0 ]; then + print_usage + exit +fi + +COMMAND=$1 +shift + +case $COMMAND in + # usage flags + --help|-help|-h) + print_usage + exit + ;; +esac + +# Determine if we're starting a secure datanode, and if so, redefine appropriate variables +if [ "$COMMAND" == "datanode" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_USER" ]; then + if [ -n "$JSVC_HOME" ]; then + if [ -n "$HADOOP_SECURE_DN_PID_DIR" ]; then + HADOOP_PID_DIR=$HADOOP_SECURE_DN_PID_DIR + fi + + if [ -n "$HADOOP_SECURE_DN_LOG_DIR" ]; then + HADOOP_LOG_DIR=$HADOOP_SECURE_DN_LOG_DIR + HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_LOG_DIR" + fi + + HADOOP_IDENT_STRING=$HADOOP_SECURE_DN_USER + HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_IDENT_STRING" + starting_secure_dn="true" + else + echo "It looks like you're trying to start a secure DN, but \$JSVC_HOME"\ + "isn't set. Falling back to starting insecure DN." + fi +fi + +if [ "$COMMAND" = "namenode" ] ; then + CLASS='org.apache.hadoop.hdfs.server.namenode.NameNode' + HADOOP_OPTS="$HADOOP_OPTS $HADOOP_NAMENODE_OPTS" +elif [ "$COMMAND" = "zkfc" ] ; then + CLASS='org.apache.hadoop.hdfs.tools.DFSZKFailoverController' + HADOOP_OPTS="$HADOOP_OPTS $HADOOP_ZKFC_OPTS" +elif [ "$COMMAND" = "secondarynamenode" ] ; then + CLASS='org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode' + HADOOP_OPTS="$HADOOP_OPTS $HADOOP_SECONDARYNAMENODE_OPTS" +elif [ "$COMMAND" = "datanode" ] ; then + CLASS='org.apache.hadoop.hdfs.server.datanode.DataNode' + if [ "$starting_secure_dn" = "true" ]; then + HADOOP_OPTS="$HADOOP_OPTS -jvm server $HADOOP_DATANODE_OPTS" + else + HADOOP_OPTS="$HADOOP_OPTS -server $HADOOP_DATANODE_OPTS" + fi +elif [ "$COMMAND" = "journalnode" ] ; then + CLASS='org.apache.hadoop.hdfs.qjournal.server.JournalNode' + HADOOP_OPTS="$HADOOP_OPTS $HADOOP_JOURNALNODE_OPTS" +elif [ "$COMMAND" = "dfs" ] ; then + CLASS=org.apache.hadoop.fs.FsShell + HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS" +elif [ "$COMMAND" = "dfsadmin" ] ; then + CLASS=org.apache.hadoop.hdfs.tools.DFSAdmin + HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS" +elif [ "$COMMAND" = "haadmin" ] ; then + CLASS=org.apache.hadoop.hdfs.tools.DFSHAAdmin + CLASSPATH=${CLASSPATH}:${TOOL_PATH} + HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS" +elif [ "$COMMAND" = "fsck" ] ; then + CLASS=org.apache.hadoop.hdfs.tools.DFSck + HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS" +elif [ "$COMMAND" = "balancer" ] ; then + CLASS=org.apache.hadoop.hdfs.server.balancer.Balancer + HADOOP_OPTS="$HADOOP_OPTS $HADOOP_BALANCER_OPTS" +elif [ "$COMMAND" = "jmxget" ] ; then + CLASS=org.apache.hadoop.hdfs.tools.JMXGet +elif [ "$COMMAND" = "oiv" ] ; then + CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer +elif [ "$COMMAND" = "oev" ] ; then + CLASS=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer +elif [ "$COMMAND" = "fetchdt" ] ; then + CLASS=org.apache.hadoop.hdfs.tools.DelegationTokenFetcher +elif [ "$COMMAND" = "getconf" ] ; then + CLASS=org.apache.hadoop.hdfs.tools.GetConf +elif [ "$COMMAND" = "groups" ] ; then + CLASS=org.apache.hadoop.hdfs.tools.GetGroups +elif [ "$COMMAND" = "snapshotDiff" ] ; then + CLASS=org.apache.hadoop.hdfs.tools.snapshot.SnapshotDiff +elif [ "$COMMAND" = "lsSnapshottableDir" ] ; then + CLASS=org.apache.hadoop.hdfs.tools.snapshot.LsSnapshottableDir +elif [ "$COMMAND" = "portmap" ] ; then + CLASS=org.apache.hadoop.portmap.Portmap +elif [ "$COMMAND" = "nfs3" ] ; then + CLASS=org.apache.hadoop.hdfs.nfs.nfs3.Nfs3 +else + CLASS="$COMMAND" +fi + +export CLASSPATH=$CLASSPATH + +HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender}" + +# Check to see if we should start a secure datanode +if [ "$starting_secure_dn" = "true" ]; then + if [ "$HADOOP_PID_DIR" = "" ]; then + HADOOP_SECURE_DN_PID="/tmp/hadoop_secure_dn.pid" + else + HADOOP_SECURE_DN_PID="$HADOOP_PID_DIR/hadoop_secure_dn.pid" + fi + + JSVC=$JSVC_HOME/jsvc + if [ ! -f $JSVC ]; then + echo "JSVC_HOME is not set correctly so jsvc cannot be found. Jsvc is required to run secure datanodes. " + echo "Please download and install jsvc from http://archive.apache.org/dist/commons/daemon/binaries/ "\ + "and set JSVC_HOME to the directory containing the jsvc binary." + exit + fi + + if [[ ! $JSVC_OUTFILE ]]; then + JSVC_OUTFILE="$HADOOP_LOG_DIR/jsvc.out" + fi + + if [[ ! $JSVC_ERRFILE ]]; then + JSVC_ERRFILE="$HADOOP_LOG_DIR/jsvc.err" + fi + + exec "$JSVC" \ + -Dproc_$COMMAND -outfile "$JSVC_OUTFILE" \ + -errfile "$JSVC_ERRFILE" \ + -pidfile "$HADOOP_SECURE_DN_PID" \ + -nodetach \ + -user "$HADOOP_SECURE_DN_USER" \ + -cp "$CLASSPATH" \ + $JAVA_HEAP_MAX $HADOOP_OPTS \ + org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter "$@" +else + # run it + exec "$JAVA" -Dproc_$COMMAND $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@" +fi + diff --git a/aarch64/bin/hdfs.cmd b/aarch64/bin/hdfs.cmd new file mode 100755 index 0000000..70af80c --- /dev/null +++ b/aarch64/bin/hdfs.cmd @@ -0,0 +1,171 @@ +@echo off +@rem Licensed to the Apache Software Foundation (ASF) under one or more +@rem contributor license agreements. See the NOTICE file distributed with +@rem this work for additional information regarding copyright ownership. +@rem The ASF licenses this file to You under the Apache License, Version 2.0 +@rem (the "License"); you may not use this file except in compliance with +@rem the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem +setlocal enabledelayedexpansion + +if not defined HADOOP_BIN_PATH ( + set HADOOP_BIN_PATH=%~dp0 +) + +if "%HADOOP_BIN_PATH:~-1%" == "\" ( + set HADOOP_BIN_PATH=%HADOOP_BIN_PATH:~0,-1% +) + +set DEFAULT_LIBEXEC_DIR=%HADOOP_BIN_PATH%\..\libexec +if not defined HADOOP_LIBEXEC_DIR ( + set HADOOP_LIBEXEC_DIR=%DEFAULT_LIBEXEC_DIR% +) + +call %HADOOP_LIBEXEC_DIR%\hdfs-config.cmd %* +if "%1" == "--config" ( + shift + shift +) + +:main + if exist %HADOOP_CONF_DIR%\hadoop-env.cmd ( + call %HADOOP_CONF_DIR%\hadoop-env.cmd + ) + + set hdfs-command=%1 + call :make_command_arguments %* + + if not defined hdfs-command ( + goto print_usage + ) + + call :%hdfs-command% %hdfs-command-arguments% + set java_arguments=%JAVA_HEAP_MAX% %HADOOP_OPTS% -classpath %CLASSPATH% %CLASS% %hdfs-command-arguments% + call %JAVA% %java_arguments% + +goto :eof + +:namenode + set CLASS=org.apache.hadoop.hdfs.server.namenode.NameNode + set HADOOP_OPTS=%HADOOP_OPTS% %HADOOP_NAMENODE_OPTS% + goto :eof + +:zkfc + set CLASS=org.apache.hadoop.hdfs.tools.DFSZKFailoverController + set HADOOP_OPTS=%HADOOP_OPTS% %HADOOP_ZKFC_OPTS% + goto :eof + +:secondarynamenode + set CLASS=org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode + set HADOOP_OPTS=%HADOOP_OPTS% %HADOOP_SECONDARYNAMENODE_OPTS% + goto :eof + +:datanode + set CLASS=org.apache.hadoop.hdfs.server.datanode.DataNode + set HADOOP_OPTS=%HADOOP_OPTS% -server %HADOOP_DATANODE_OPTS% + goto :eof + +:dfs + set CLASS=org.apache.hadoop.fs.FsShell + set HADOOP_OPTS=%HADOOP_OPTS% %HADOOP_CLIENT_OPTS% + goto :eof + +:dfsadmin + set CLASS=org.apache.hadoop.hdfs.tools.DFSAdmin + set HADOOP_OPTS=%HADOOP_OPTS% %HADOOP_CLIENT_OPTS% + goto :eof + +:haadmin + set CLASS=org.apache.hadoop.hdfs.tools.DFSHAAdmin + set CLASSPATH=%CLASSPATH%;%TOOL_PATH% + set HADOOP_OPTS=%HADOOP_OPTS% %HADOOP_CLIENT_OPTS% + goto :eof + +:fsck + set CLASS=org.apache.hadoop.hdfs.tools.DFSck + set HADOOP_OPTS=%HADOOP_OPTS% %HADOOP_CLIENT_OPTS% + goto :eof + +:balancer + set CLASS=org.apache.hadoop.hdfs.server.balancer.Balancer + set HADOOP_OPTS=%HADOOP_OPTS% %HADOOP_BALANCER_OPTS% + goto :eof + +:jmxget + set CLASS=org.apache.hadoop.hdfs.tools.JMXGet + goto :eof + +:oiv + set CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer + goto :eof + +:oev + set CLASS=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer + goto :eof + +:fetchdt + set CLASS=org.apache.hadoop.hdfs.tools.DelegationTokenFetcher + goto :eof + +:getconf + set CLASS=org.apache.hadoop.hdfs.tools.GetConf + goto :eof + +:groups + set CLASS=org.apache.hadoop.hdfs.tools.GetGroups + goto :eof + +@rem This changes %1, %2 etc. Hence those cannot be used after calling this. +:make_command_arguments + if "%1" == "--config" ( + shift + shift + ) + if [%2] == [] goto :eof + shift + set _hdfsarguments= + :MakeCmdArgsLoop + if [%1]==[] goto :EndLoop + + if not defined _hdfsarguments ( + set _hdfsarguments=%1 + ) else ( + set _hdfsarguments=!_hdfsarguments! %1 + ) + shift + goto :MakeCmdArgsLoop + :EndLoop + set hdfs-command-arguments=%_hdfsarguments% + goto :eof + +:print_usage + @echo Usage: hdfs [--config confdir] COMMAND + @echo where COMMAND is one of: + @echo dfs run a filesystem command on the file systems supported in Hadoop. + @echo namenode -format format the DFS filesystem + @echo secondarynamenode run the DFS secondary namenode + @echo namenode run the DFS namenode + @echo zkfc run the ZK Failover Controller daemon + @echo datanode run a DFS datanode + @echo dfsadmin run a DFS admin client + @echo fsck run a DFS filesystem checking utility + @echo balancer run a cluster balancing utility + @echo jmxget get JMX exported values from NameNode or DataNode. + @echo oiv apply the offline fsimage viewer to an fsimage + @echo oev apply the offline edits viewer to an edits file + @echo fetchdt fetch a delegation token from the NameNode + @echo getconf get config values from configuration + @echo groups get the groups which users belong to + @echo Use -help to see options + @echo. + @echo Most commands print help when invoked w/o parameters. + +endlocal diff --git a/aarch64/bin/mapred b/aarch64/bin/mapred new file mode 100755 index 0000000..531fd95 --- /dev/null +++ b/aarch64/bin/mapred @@ -0,0 +1,148 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +bin=`which $0` +bin=`dirname ${bin}` +bin=`cd "$bin"; pwd` + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +if [ -e ${HADOOP_LIBEXEC_DIR}/mapred-config.sh ]; then + . ${HADOOP_LIBEXEC_DIR}/mapred-config.sh +else + . "$bin/mapred-config.sh" +fi + +function print_usage(){ + echo "Usage: mapred [--config confdir] COMMAND" + echo " where COMMAND is one of:" + echo " pipes run a Pipes job" + echo " job manipulate MapReduce jobs" + echo " queue get information regarding JobQueues" + echo " classpath prints the class path needed for running" + echo " mapreduce subcommands" + echo " historyserver run job history servers as a standalone daemon" + echo " distcp copy file or directories recursively" + echo " archive -archiveName NAME -p * create a hadoop archive" + echo "" + echo "Most commands print help when invoked w/o parameters." +} + +if [ $# = 0 ]; then + print_usage + exit +fi + +COMMAND=$1 +shift + +case $COMMAND in + # usage flags + --help|-help|-h) + print_usage + exit + ;; +esac + +if [ "$COMMAND" = "job" ] ; then + CLASS=org.apache.hadoop.mapred.JobClient + HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS" +elif [ "$COMMAND" = "queue" ] ; then + CLASS=org.apache.hadoop.mapred.JobQueueClient + HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS" +elif [ "$COMMAND" = "pipes" ] ; then + CLASS=org.apache.hadoop.mapred.pipes.Submitter + HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS" +elif [ "$COMMAND" = "sampler" ] ; then + CLASS=org.apache.hadoop.mapred.lib.InputSampler + HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS" +elif [ "$COMMAND" = "classpath" ] ; then + echo -n +elif [ "$COMMAND" = "historyserver" ] ; then + CLASS=org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer + HADOOP_OPTS="$HADOOP_OPTS -Dmapred.jobsummary.logger=${HADOOP_JHS_LOGGER:-INFO,console} $HADOOP_JOB_HISTORYSERVER_OPTS" + if [ "$HADOOP_JOB_HISTORYSERVER_HEAPSIZE" != "" ]; then + JAVA_HEAP_MAX="-Xmx""$HADOOP_JOB_HISTORYSERVER_HEAPSIZE""m" + fi +elif [ "$COMMAND" = "mradmin" ] \ + || [ "$COMMAND" = "jobtracker" ] \ + || [ "$COMMAND" = "tasktracker" ] \ + || [ "$COMMAND" = "groups" ] ; then + echo "Sorry, the $COMMAND command is no longer supported." + echo "You may find similar functionality with the \"yarn\" shell command." + print_usage + exit +elif [ "$COMMAND" = "distcp" ] ; then + CLASS=org.apache.hadoop.tools.DistCp + CLASSPATH=${CLASSPATH}:${TOOL_PATH} + HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS" +elif [ "$COMMAND" = "archive" ] ; then + CLASS=org.apache.hadoop.tools.HadoopArchives + CLASSPATH=${CLASSPATH}:${TOOL_PATH} + HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS" +else + echo $COMMAND - invalid command + print_usage + exit +fi + +# for developers, add mapred classes to CLASSPATH +if [ -d "$HADOOP_MAPRED_HOME/build/classes" ]; then + CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/build/classes +fi +if [ -d "$HADOOP_MAPRED_HOME/build/webapps" ]; then + CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/build +fi +if [ -d "$HADOOP_MAPRED_HOME/build/test/classes" ]; then + CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/build/test/classes +fi +if [ -d "$HADOOP_MAPRED_HOME/build/tools" ]; then + CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/build/tools +fi + +# for releases, add core mapred jar & webapps to CLASSPATH +if [ -d "$HADOOP_PREFIX/${MAPRED_DIR}/webapps" ]; then + CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX/${MAPRED_DIR} +fi +for f in $HADOOP_MAPRED_HOME/${MAPRED_DIR}/*.jar; do + CLASSPATH=${CLASSPATH}:$f; +done + +# Need YARN jars also +for f in $HADOOP_YARN_HOME/${YARN_DIR}/*.jar; do + CLASSPATH=${CLASSPATH}:$f; +done + +# add libs to CLASSPATH +for f in $HADOOP_MAPRED_HOME/${MAPRED_LIB_JARS_DIR}/*.jar; do + CLASSPATH=${CLASSPATH}:$f; +done + +# add modules to CLASSPATH +for f in $HADOOP_MAPRED_HOME/modules/*.jar; do + CLASSPATH=${CLASSPATH}:$f; +done + +if [ "$COMMAND" = "classpath" ] ; then + echo $CLASSPATH + exit +fi + +HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender}" + +export CLASSPATH +exec "$JAVA" -Dproc_$COMMAND $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@" diff --git a/aarch64/bin/mapred.cmd b/aarch64/bin/mapred.cmd new file mode 100755 index 0000000..b2d53fa --- /dev/null +++ b/aarch64/bin/mapred.cmd @@ -0,0 +1,195 @@ +@echo off +@rem Licensed to the Apache Software Foundation (ASF) under one or more +@rem contributor license agreements. See the NOTICE file distributed with +@rem this work for additional information regarding copyright ownership. +@rem The ASF licenses this file to You under the Apache License, Version 2.0 +@rem (the "License"); you may not use this file except in compliance with +@rem the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. + +@rem The Hadoop mapred command script + +setlocal enabledelayedexpansion + +if not defined HADOOP_BIN_PATH ( + set HADOOP_BIN_PATH=%~dp0 +) + +if "%HADOOP_BIN_PATH:~`%" == "\" ( + set HADOOP_BIN_PATH=%HADOOP_BIN_PATH:~0,-1% +) + +set DEFAULT_LIBEXEC_DIR=%HADOOP_BIN_PATH%\..\libexec +if not defined HADOOP_LIBEXEC_DIR ( + set HADOOP_LIBEXEC_DIR=%DEFAULT_LIBEXEC_DIR% +) + +call %DEFAULT_LIBEXEC_DIR%\mapred-config.cmd %* +if "%1" == "--config" ( + shift + shift +) + +:main + if exist %MAPRED_CONF_DIR%\mapred-env.cmd ( + call %MAPRED_CONF_DIR%\mapred-env.cmd + ) + set mapred-command=%1 + call :make_command_arguments %* + + if not defined mapred-command ( + goto print_usage + ) + + @rem JAVA and JAVA_HEAP_MAX are set in hadoop-confg.cmd + + if defined MAPRED_HEAPSIZE ( + @rem echo run with Java heapsize %MAPRED_HEAPSIZE% + set JAVA_HEAP_SIZE=-Xmx%MAPRED_HEAPSIZE%m + ) + + @rem CLASSPATH initially contains HADOOP_CONF_DIR and MAPRED_CONF_DIR + if not defined HADOOP_CONF_DIR ( + echo NO HADOOP_CONF_DIR set. + echo Please specify it either in mapred-env.cmd or in the environment. + goto :eof + ) + + set CLASSPATH=%HADOOP_CONF_DIR%;%MAPRED_CONF_DIR%;%CLASSPATH% + + @rem for developers, add Hadoop classes to CLASSPATH + if exist %HADOOP_MAPRED_HOME%\build\classes ( + set CLASSPATH=%CLASSPATH%;%HADOOP_MAPRED_HOME%\build\classes + ) + + if exist %HADOOP_MAPRED_HOME%\build\webapps ( + set CLASSPATH=%CLASSPATH%;%HADOOP_MAPRED_HOME%\build + ) + + if exist %HADOOP_MAPRED_HOME%\build\test\classes ( + set CLASSPATH=%CLASSPATH%;%HADOOP_MAPRED_HOME%\build\test\classes + ) + + if exist %HADOOP_MAPRED_HOME%\build\tools ( + set CLASSPATH=%CLASSPATH%;%HADOOP_MAPRED_HOME%\build\tools + ) + + @rem Need YARN jars also + set CLASSPATH=%CLASSPATH%;%HADOOP_YARN_HOME%\%YARN_DIR%\* + + @rem add libs to CLASSPATH + set CLASSPATH=%CLASSPATH%;%HADOOP_MAPRED_HOME%\%MAPRED_LIB_JARS_DIR%\* + + @rem add modules to CLASSPATH + set CLASSPATH=%CLASSPATH%;%HADOOP_MAPRED_HOME%\modules\* + + call :%mapred-command% %mapred-command-arguments% + set java_arguments=%JAVA_HEAP_MAX% %MAPRED_OPTS% -classpath %CLASSPATH% %CLASS% %mapred-command-arguments% + call %JAVA% %java_arguments% + +goto :eof + + +:classpath + @echo %CLASSPATH% + goto :eof + +:job + set CLASS=org.apache.hadoop.mapred.JobClient + set HADOOP_OPTS=%HADOOP_OPTS% %HADOOP_CLIENT_OPTS% + goto :eof + +:queue + set CLASS=org.apache.hadoop.mapred.JobQueueClient + set HADOOP_OPTS=%HADOOP_OPTS% %HADOOP_CLIENT_OPTS% + goto :eof + +:sampler + set CLASS=org.apache.hadoop.mapred.lib.InputSampler + set HADOOP_OPTS=%HADOOP_OPTS% %HADOOP_CLIENT_OPTS% + goto :eof + +:historyserver + set CLASS=org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer + set HADOOP_OPTS=%HADOOP_OPTS% -Dmapred.jobsummary.logger=%HADOOP_JHS_LOGGER% %HADOOP_JOB_HISTORYSERVER_OPTS%" + if defined HADOOP_JOB_HISTORYSERVER_HEAPSIZE ( + set JAVA_HEAP_MAX=-Xmx%HADOOP_JOB_HISTORYSERVER_HEAPSIZE%m + ) + goto :eof + +:distcp + set CLASS=org.apache.hadoop.tools.DistCp + set CLASSPATH=%CLASSPATH%;%TOO_PATH% + set HADOOP_OPTS=%HADOOP_OPTS% %HADOOP_CLIENT_OPTS% + goto :eof + +:archive + set CLASS=org.apache.hadop.tools.HadoopArchives + set CLASSPATH=%CLASSPATH%;%TOO_PATH% + set HADOOP_OPTS=%HADOOP_OPTS% %HADOOP_CLIENT_OPTS% + +:pipes + goto not_supported + +:mradmin + goto not_supported + +:jobtracker + goto not_supported + +:tasktracker + goto not_supported + +:groups + goto not_supported + + +@rem This changes %1, %2 etc. Hence those cannot be used after calling this. +:make_command_arguments + if [%2] == [] goto :eof + if "%1" == "--config" ( + shift + shift + ) + shift + set _mapredarguments= + :MakeCmdArgsLoop + if [%1]==[] goto :EndLoop + + if not defined _mapredarguments ( + set _mapredarguments=%1 + ) else ( + set _mapredarguments=!_mapredarguments! %1 + ) + shift + goto :MakeCmdArgsLoop + :EndLoop + set mapred-command-arguments=%_mapredarguments% + goto :eof + +:not_supported + @echo Sorry, the %COMMAND% command is no longer supported. + @echo You may find similar functionality with the "yarn" shell command. + goto print_usage + +:print_usage + @echo Usage: mapred [--config confdir] COMMAND + @echo where COMMAND is one of: + @echo job manipulate MapReduce jobs + @echo queue get information regarding JobQueues + @echo classpath prints the class path needed for running + @echo mapreduce subcommands + @echo historyserver run job history servers as a standalone daemon + @echo distcp ^ ^ copy file or directories recursively + @echo archive -archiveName NAME -p ^ ^* ^ create a hadoop archive + @echo + @echo Most commands print help when invoked w/o parameters. + +endlocal diff --git a/aarch64/bin/rcc b/aarch64/bin/rcc new file mode 100755 index 0000000..22bffff --- /dev/null +++ b/aarch64/bin/rcc @@ -0,0 +1,61 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# The Hadoop record compiler +# +# Environment Variables +# +# JAVA_HOME The java implementation to use. Overrides JAVA_HOME. +# +# HADOOP_OPTS Extra Java runtime options. +# +# HADOOP_CONF_DIR Alternate conf dir. Default is ${HADOOP_PREFIX}/conf. +# + +bin=`dirname "${BASH_SOURCE-$0}"` +bin=`cd "$bin"; pwd` + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +. $HADOOP_LIBEXEC_DIR/hadoop-config.sh + +if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then + . "${HADOOP_CONF_DIR}/hadoop-env.sh" +fi + +# some Java parameters +if [ "$JAVA_HOME" != "" ]; then + #echo "run java in $JAVA_HOME" + JAVA_HOME=$JAVA_HOME +fi + +if [ "$JAVA_HOME" = "" ]; then + echo "Error: JAVA_HOME is not set." + exit 1 +fi + +JAVA=$JAVA_HOME/bin/java +JAVA_HEAP_MAX=-Xmx1000m + +# restore ordinary behaviour +unset IFS + +CLASS='org.apache.hadoop.record.compiler.generated.Rcc' + +# run it +exec "$JAVA" $HADOOP_OPTS -classpath "$CLASSPATH" $CLASS "$@" diff --git a/aarch64/bin/test-container-executor b/aarch64/bin/test-container-executor new file mode 100755 index 0000000..e2992cf Binary files /dev/null and b/aarch64/bin/test-container-executor differ diff --git a/aarch64/bin/yarn b/aarch64/bin/yarn new file mode 100755 index 0000000..8d907be --- /dev/null +++ b/aarch64/bin/yarn @@ -0,0 +1,235 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# The Hadoop command script +# +# Environment Variables +# +# JAVA_HOME The java implementation to use. Overrides JAVA_HOME. +# +# YARN_CLASSPATH Extra Java CLASSPATH entries. +# +# YARN_HEAPSIZE The maximum amount of heap to use, in MB. +# Default is 1000. +# +# YARN_{COMMAND}_HEAPSIZE overrides YARN_HEAPSIZE for a given command +# eg YARN_NODEMANAGER_HEAPSIZE sets the heap +# size for the NodeManager. If you set the +# heap size in YARN_{COMMAND}_OPTS or YARN_OPTS +# they take precedence. +# +# YARN_OPTS Extra Java runtime options. +# +# YARN_CLIENT_OPTS when the respective command is run. +# YARN_{COMMAND}_OPTS etc YARN_NODEMANAGER_OPTS applies to NodeManager +# for e.g. YARN_CLIENT_OPTS applies to +# more than one command (fs, dfs, fsck, +# dfsadmin etc) +# +# YARN_CONF_DIR Alternate conf dir. Default is ${HADOOP_YARN_HOME}/conf. +# +# YARN_ROOT_LOGGER The root appender. Default is INFO,console +# + +bin=`dirname "${BASH_SOURCE-$0}"` +bin=`cd "$bin"; pwd` + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +. $HADOOP_LIBEXEC_DIR/yarn-config.sh + +function print_usage(){ + echo "Usage: yarn [--config confdir] COMMAND" + echo "where COMMAND is one of:" + echo " resourcemanager run the ResourceManager" + echo " nodemanager run a nodemanager on each slave" + echo " rmadmin admin tools" + echo " version print the version" + echo " jar run a jar file" + echo " application prints application(s) report/kill application" + echo " node prints node report(s)" + echo " logs dump container logs" + echo " classpath prints the class path needed to get the" + echo " Hadoop jar and the required libraries" + echo " daemonlog get/set the log level for each daemon" + echo " or" + echo " CLASSNAME run the class named CLASSNAME" + echo "Most commands print help when invoked w/o parameters." +} + +# if no args specified, show usage +if [ $# = 0 ]; then + print_usage + exit 1 +fi + +# get arguments +COMMAND=$1 +shift + +case $COMMAND in + # usage flags + --help|-help|-h) + print_usage + exit + ;; +esac + +if [ -f "${YARN_CONF_DIR}/yarn-env.sh" ]; then + . "${YARN_CONF_DIR}/yarn-env.sh" +fi + +# some Java parameters +if [ "$JAVA_HOME" != "" ]; then + #echo "run java in $JAVA_HOME" + JAVA_HOME=$JAVA_HOME +fi + +if [ "$JAVA_HOME" = "" ]; then + echo "Error: JAVA_HOME is not set." + exit 1 +fi + +JAVA=$JAVA_HOME/bin/java +JAVA_HEAP_MAX=-Xmx1000m + +# check envvars which might override default args +if [ "$YARN_HEAPSIZE" != "" ]; then + #echo "run with heapsize $YARN_HEAPSIZE" + JAVA_HEAP_MAX="-Xmx""$YARN_HEAPSIZE""m" + #echo $JAVA_HEAP_MAX +fi + +# CLASSPATH initially contains $HADOOP_CONF_DIR & $YARN_CONF_DIR +if [ ! -d "$HADOOP_CONF_DIR" ]; then + echo No HADOOP_CONF_DIR set. + echo Please specify it either in yarn-env.sh or in the environment. + exit 1 +fi + +CLASSPATH="${HADOOP_CONF_DIR}:${YARN_CONF_DIR}:${CLASSPATH}" + +# for developers, add Hadoop classes to CLASSPATH +if [ -d "$HADOOP_YARN_HOME/yarn-api/target/classes" ]; then + CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-api/target/classes +fi +if [ -d "$HADOOP_YARN_HOME/yarn-common/target/classes" ]; then + CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-common/target/classes +fi +if [ -d "$HADOOP_YARN_HOME/yarn-mapreduce/target/classes" ]; then + CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-mapreduce/target/classes +fi +if [ -d "$HADOOP_YARN_HOME/yarn-master-worker/target/classes" ]; then + CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-master-worker/target/classes +fi +if [ -d "$HADOOP_YARN_HOME/yarn-server/yarn-server-nodemanager/target/classes" ]; then + CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-server/yarn-server-nodemanager/target/classes +fi +if [ -d "$HADOOP_YARN_HOME/yarn-server/yarn-server-common/target/classes" ]; then + CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-server/yarn-server-common/target/classes +fi +if [ -d "$HADOOP_YARN_HOME/yarn-server/yarn-server-resourcemanager/target/classes" ]; then + CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-server/yarn-server-resourcemanager/target/classes +fi +if [ -d "$HADOOP_YARN_HOME/build/test/classes" ]; then + CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/target/test/classes +fi +if [ -d "$HADOOP_YARN_HOME/build/tools" ]; then + CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/build/tools +fi + +CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/${YARN_DIR}/* +CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/${YARN_LIB_JARS_DIR}/* + +# so that filenames w/ spaces are handled correctly in loops below +IFS= + +# default log directory & file +if [ "$YARN_LOG_DIR" = "" ]; then + YARN_LOG_DIR="$HADOOP_YARN_HOME/logs" +fi +if [ "$YARN_LOGFILE" = "" ]; then + YARN_LOGFILE='yarn.log' +fi + +# restore ordinary behaviour +unset IFS + +# figure out which class to run +if [ "$COMMAND" = "classpath" ] ; then + echo $CLASSPATH + exit +elif [ "$COMMAND" = "rmadmin" ] ; then + CLASS='org.apache.hadoop.yarn.client.cli.RMAdminCLI' + YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS" +elif [ "$COMMAND" = "application" ] ; then + CLASS=org.apache.hadoop.yarn.client.cli.ApplicationCLI + YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS" +elif [ "$COMMAND" = "node" ] ; then + CLASS=org.apache.hadoop.yarn.client.cli.NodeCLI + YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS" +elif [ "$COMMAND" = "resourcemanager" ] ; then + CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/rm-config/log4j.properties + CLASS='org.apache.hadoop.yarn.server.resourcemanager.ResourceManager' + YARN_OPTS="$YARN_OPTS $YARN_RESOURCEMANAGER_OPTS" + if [ "$YARN_RESOURCEMANAGER_HEAPSIZE" != "" ]; then + JAVA_HEAP_MAX="-Xmx""$YARN_RESOURCEMANAGER_HEAPSIZE""m" + fi +elif [ "$COMMAND" = "nodemanager" ] ; then + CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/nm-config/log4j.properties + CLASS='org.apache.hadoop.yarn.server.nodemanager.NodeManager' + YARN_OPTS="$YARN_OPTS -server $YARN_NODEMANAGER_OPTS" + if [ "$YARN_NODEMANAGER_HEAPSIZE" != "" ]; then + JAVA_HEAP_MAX="-Xmx""$YARN_NODEMANAGER_HEAPSIZE""m" + fi +elif [ "$COMMAND" = "proxyserver" ] ; then + CLASS='org.apache.hadoop.yarn.server.webproxy.WebAppProxyServer' + YARN_OPTS="$YARN_OPTS $YARN_PROXYSERVER_OPTS" + if [ "$YARN_PROXYSERVER_HEAPSIZE" != "" ]; then + JAVA_HEAP_MAX="-Xmx""$YARN_PROXYSERVER_HEAPSIZE""m" + fi +elif [ "$COMMAND" = "version" ] ; then + CLASS=org.apache.hadoop.util.VersionInfo + YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS" +elif [ "$COMMAND" = "jar" ] ; then + CLASS=org.apache.hadoop.util.RunJar + YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS" +elif [ "$COMMAND" = "logs" ] ; then + CLASS=org.apache.hadoop.yarn.client.cli.LogsCLI + YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS" +elif [ "$COMMAND" = "daemonlog" ] ; then + CLASS=org.apache.hadoop.log.LogLevel + YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS" +else + CLASS=$COMMAND +fi + +YARN_OPTS="$YARN_OPTS -Dhadoop.log.dir=$YARN_LOG_DIR" +YARN_OPTS="$YARN_OPTS -Dyarn.log.dir=$YARN_LOG_DIR" +YARN_OPTS="$YARN_OPTS -Dhadoop.log.file=$YARN_LOGFILE" +YARN_OPTS="$YARN_OPTS -Dyarn.log.file=$YARN_LOGFILE" +YARN_OPTS="$YARN_OPTS -Dyarn.home.dir=$HADOOP_YARN_HOME" +YARN_OPTS="$YARN_OPTS -Dhadoop.home.dir=$HADOOP_YARN_HOME" +YARN_OPTS="$YARN_OPTS -Dhadoop.root.logger=${YARN_ROOT_LOGGER:-INFO,console}" +YARN_OPTS="$YARN_OPTS -Dyarn.root.logger=${YARN_ROOT_LOGGER:-INFO,console}" +if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then + YARN_OPTS="$YARN_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH" +fi + +exec "$JAVA" -Dproc_$COMMAND $JAVA_HEAP_MAX $YARN_OPTS -classpath "$CLASSPATH" $CLASS "$@" +fi diff --git a/aarch64/bin/yarn.cmd b/aarch64/bin/yarn.cmd new file mode 100755 index 0000000..955df46 --- /dev/null +++ b/aarch64/bin/yarn.cmd @@ -0,0 +1,254 @@ +@echo off +@rem Licensed to the Apache Software Foundation (ASF) under one or more +@rem contributor license agreements. See the NOTICE file distributed with +@rem this work for additional information regarding copyright ownership. +@rem The ASF licenses this file to You under the Apache License, Version 2.0 +@rem (the "License"); you may not use this file except in compliance with +@rem the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. + +@rem The Hadoop command script +@rem +@rem Environment Variables +@rem +@rem JAVA_HOME The java implementation to use. Overrides JAVA_HOME. +@rem +@rem YARN_CLASSPATH Extra Java CLASSPATH entries. +@rem +@rem YARN_HEAPSIZE The maximum amount of heap to use, in MB. +@rem Default is 1000. +@rem +@rem YARN_{COMMAND}_HEAPSIZE overrides YARN_HEAPSIZE for a given command +@rem eg YARN_NODEMANAGER_HEAPSIZE sets the heap +@rem size for the NodeManager. If you set the +@rem heap size in YARN_{COMMAND}_OPTS or YARN_OPTS +@rem they take precedence. +@rem +@rem YARN_OPTS Extra Java runtime options. +@rem +@rem YARN_CLIENT_OPTS when the respective command is run. +@rem YARN_{COMMAND}_OPTS etc YARN_NODEMANAGER_OPTS applies to NodeManager +@rem for e.g. YARN_CLIENT_OPTS applies to +@rem more than one command (fs, dfs, fsck, +@rem dfsadmin etc) +@rem +@rem YARN_CONF_DIR Alternate conf dir. Default is ${HADOOP_YARN_HOME}/conf. +@rem +@rem YARN_ROOT_LOGGER The root appender. Default is INFO,console +@rem + +setlocal enabledelayedexpansion + +if not defined HADOOP_BIN_PATH ( + set HADOOP_BIN_PATH=%~dp0 +) + +if "%HADOOP_BIN_PATH:~-1%" == "\" ( + set HADOOP_BIN_PATH=%HADOOP_BIN_PATH:~0,-1% +) + +set DEFAULT_LIBEXEC_DIR=%HADOOP_BIN_PATH%\..\libexec +if not defined HADOOP_LIBEXEC_DIR ( + set HADOOP_LIBEXEC_DIR=%DEFAULT_LIBEXEC_DIR% +) + +call %DEFAULT_LIBEXEC_DIR%\yarn-config.cmd %* +if "%1" == "--config" ( + shift + shift +) + +:main + if exist %YARN_CONF_DIR%\yarn-env.cmd ( + call %YARN_CONF_DIR%\yarn-env.cmd + ) + + set yarn-command=%1 + call :make_command_arguments %* + + if not defined yarn-command ( + goto print_usage + ) + + @rem JAVA and JAVA_HEAP_MAX and set in hadoop-config.cmd + + if defined YARN_HEAPSIZE ( + @rem echo run with Java heapsize %YARN_HEAPSIZE% + set JAVA_HEAP_MAX=-Xmx%YARN_HEAPSIZE%m + ) + + @rem CLASSPATH initially contains HADOOP_CONF_DIR & YARN_CONF_DIR + if not defined HADOOP_CONF_DIR ( + echo No HADOOP_CONF_DIR set. + echo Please specify it either in yarn-env.cmd or in the environment. + goto :eof + ) + + set CLASSPATH=%HADOOP_CONF_DIR%;%YARN_CONF_DIR%;%CLASSPATH% + + @rem for developers, add Hadoop classes to CLASSPATH + if exist %HADOOP_YARN_HOME%\yarn-api\target\classes ( + set CLASSPATH=%CLASSPATH%;%HADOOP_YARN_HOME%\yarn-api\target\classes + ) + + if exist %HADOOP_YARN_HOME%\yarn-common\target\classes ( + set CLASSPATH=%CLASSPATH%;%HADOOP_YARN_HOME%\yarn-common\target\classes + ) + + if exist %HADOOP_YARN_HOME%\yarn-mapreduce\target\classes ( + set CLASSPATH=%CLASSPATH%;%HADOOP_YARN_HOME%\yarn-mapreduce\target\classes + ) + + if exist %HADOOP_YARN_HOME%\yarn-master-worker\target\classes ( + set CLASSPATH=%CLASSPATH%;%HADOOP_YARN_HOME%\yarn-master-worker\target\classes + ) + + if exist %HADOOP_YARN_HOME%\yarn-server\yarn-server-nodemanager\target\classes ( + set CLASSPATH=%CLASSPATH%;%HADOOP_YARN_HOME%\yarn-server\yarn-server-nodemanager\target\classes + ) + + if exist %HADOOP_YARN_HOME%\yarn-server\yarn-server-common\target\classes ( + set CLASSPATH=%CLASSPATH%;%HADOOP_YARN_HOME%\yarn-server\yarn-server-common\target\classes + ) + + if exist %HADOOP_YARN_HOME%\yarn-server\yarn-server-resourcemanager\target\classes ( + set CLASSPATH=%CLASSPATH%;%HADOOP_YARN_HOME%\yarn-server\yarn-server-resourcemanager\target\classes + ) + + if exist %HADOOP_YARN_HOME%\build\test\classes ( + set CLASSPATH=%CLASSPATH%;%HADOOP_YARN_HOME%\build\test\classes + ) + + if exist %HADOOP_YARN_HOME%\build\tools ( + set CLASSPATH=%CLASSPATH%;%HADOOP_YARN_HOME%\build\tools + ) + + set CLASSPATH=%CLASSPATH%;%HADOOP_YARN_HOME%\%YARN_DIR%\* + set CLASSPATH=%CLASSPATH%;%HADOOP_YARN_HOME%\%YARN_LIB_JARS_DIR%\* + + call :%yarn-command% %yarn-command-arguments% + + if defined JAVA_LIBRARY_PATH ( + set YARN_OPTS=%YARN_OPTS% -Djava.library.path=%JAVA_LIBRARY_PATH% + ) + + set java_arguments=%JAVA_HEAP_MAX% %YARN_OPTS% -classpath %CLASSPATH% %CLASS% %yarn-command-arguments% + call %JAVA% %java_arguments% + +goto :eof + +:classpath + @echo %CLASSPATH% + goto :eof + +:rmadmin + set CLASS=org.apache.hadoop.yarn.server.resourcemanager.tools.RMAdmin + set YARN_OPTS=%YARN_OPTS% %YARN_CLIENT_OPTS% + goto :eof + +:application + set CLASS=org.apache.hadoop.yarn.client.cli.ApplicationCLI + set YARN_OPTS=%YARN_OPTS% %YARN_CLIENT_OPTS% + goto :eof + +:node + set CLASS=org.apache.hadoop.yarn.client.cli.NodeCLI + set YARN_OPTS=%YARN_OPTS% %YARN_CLIENT_OPTS% + goto :eof + +:resourcemanager + set CLASSPATH=%CLASSPATH%;%YARN_CONF_DIR%\rm-config\log4j.properties + set CLASS=org.apache.hadoop.yarn.server.resourcemanager.ResourceManager + set YARN_OPTS=%YARN_OPTS% %HADOOP_RESOURCEMANAGER_OPTS% + if defined YARN_RESOURCEMANAGER_HEAPSIZE ( + set JAVA_HEAP_MAX=-Xmx%YARN_RESOURCEMANAGER_HEAPSIZE%m + ) + goto :eof + +:nodemanager + set CLASSPATH=%CLASSPATH%;%YARN_CONF_DIR%\nm-config\log4j.properties + set CLASS=org.apache.hadoop.yarn.server.nodemanager.NodeManager + set YARN_OPTS=%YARN_OPTS% -server %HADOOP_NODEMANAGER_OPTS% + if defined YARN_NODEMANAGER_HEAPSIZE ( + set JAVA_HEAP_MAX=-Xmx%YARN_NODEMANAGER_HEAPSIZE%m + ) + goto :eof + +:proxyserver + set CLASS=org.apache.hadoop.yarn.server.webproxy.WebAppProxyServer + set YARN_OPTS=%YARN_OPTS% %HADOOP_PROXYSERVER_OPTS% + if defined YARN_PROXYSERVER_HEAPSIZE ( + set JAVA_HEAP_MAX=-Xmx%YARN_PROXYSERVER_HEAPSIZE%m + ) + goto :eof + +:version + set CLASS=org.apache.hadoop.util.VersionInfo + set YARN_OPTS=%YARN_OPTS% %YARN_CLIENT_OPTS% + goto :eof + +:jar + set CLASS=org.apache.hadoop.util.RunJar + set YARN_OPTS=%YARN_OPTS% %YARN_CLIENT_OPTS% + goto :eof + +:logs + set CLASS=org.apache.hadoop.yarn.logaggregation.LogDumper + set YARN_OPTS=%YARN_OPTS% %YARN_CLIENT_OPTS% + goto :eof + +:daemonlog + set CLASS=org.apache.hadoop.log.LogLevel + set YARN_OPTS=%YARN_OPTS% %YARN_CLIENT_OPTS% + goto :eof + +@rem This changes %1, %2 etc. Hence those cannot be used after calling this. +:make_command_arguments + if "%1" == "--config" ( + shift + shift + ) + if [%2] == [] goto :eof + shift + set _yarnarguments= + :MakeCmdArgsLoop + if [%1]==[] goto :EndLoop + + if not defined _yarnarguments ( + set _yarnarguments=%1 + ) else ( + set _yarnarguments=!_yarnarguments! %1 + ) + shift + goto :MakeCmdArgsLoop + :EndLoop + set yarn-command-arguments=%_yarnarguments% + goto :eof + +:print_usage + @echo Usage: yarn [--config confdir] COMMAND + @echo where COMMAND is one of: + @echo resourcemanager run the ResourceManager + @echo nodemanager run a nodemanager on each slave + @echo historyserver run job history servers as a standalone daemon + @echo rmadmin admin tools + @echo version print the version + @echo jar ^ run a jar file + @echo application prints application(s) report/kill application + @echo node prints node report(s) + @echo logs dump container logs + @echo classpath prints the class path needed to get the + @echo Hadoop jar and the required libraries + @echo daemonlog get/set the log level for each daemon + @echo or + @echo CLASSNAME run the class named CLASSNAME + @echo Most commands print help when invoked w/o parameters. + +endlocal diff --git a/aarch64/etc/hadoop/capacity-scheduler.xml b/aarch64/etc/hadoop/capacity-scheduler.xml new file mode 100644 index 0000000..80a9fec --- /dev/null +++ b/aarch64/etc/hadoop/capacity-scheduler.xml @@ -0,0 +1,111 @@ + + + + + yarn.scheduler.capacity.maximum-applications + 10000 + + Maximum number of applications that can be pending and running. + + + + + yarn.scheduler.capacity.maximum-am-resource-percent + 0.1 + + Maximum percent of resources in the cluster which can be used to run + application masters i.e. controls number of concurrent running + applications. + + + + + yarn.scheduler.capacity.resource-calculator + org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator + + The ResourceCalculator implementation to be used to compare + Resources in the scheduler. + The default i.e. DefaultResourceCalculator only uses Memory while + DominantResourceCalculator uses dominant-resource to compare + multi-dimensional resources such as Memory, CPU etc. + + + + + yarn.scheduler.capacity.root.queues + default + + The queues at the this level (root is the root queue). + + + + + yarn.scheduler.capacity.root.default.capacity + 100 + Default queue target capacity. + + + + yarn.scheduler.capacity.root.default.user-limit-factor + 1 + + Default queue user limit a percentage from 0.0 to 1.0. + + + + + yarn.scheduler.capacity.root.default.maximum-capacity + 100 + + The maximum capacity of the default queue. + + + + + yarn.scheduler.capacity.root.default.state + RUNNING + + The state of the default queue. State can be one of RUNNING or STOPPED. + + + + + yarn.scheduler.capacity.root.default.acl_submit_applications + * + + The ACL of who can submit jobs to the default queue. + + + + + yarn.scheduler.capacity.root.default.acl_administer_queue + * + + The ACL of who can administer jobs on the default queue. + + + + + yarn.scheduler.capacity.node-locality-delay + -1 + + Number of missed scheduling opportunities after which the CapacityScheduler + attempts to schedule rack-local containers. + Typically this should be set to number of racks in the cluster, this + feature is disabled by default, set to -1. + + + + diff --git a/aarch64/etc/hadoop/configuration.xsl b/aarch64/etc/hadoop/configuration.xsl new file mode 100644 index 0000000..d50d80b --- /dev/null +++ b/aarch64/etc/hadoop/configuration.xsl @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + +
namevaluedescription
+ + +
+
diff --git a/aarch64/etc/hadoop/container-executor.cfg b/aarch64/etc/hadoop/container-executor.cfg new file mode 100644 index 0000000..d68cee8 --- /dev/null +++ b/aarch64/etc/hadoop/container-executor.cfg @@ -0,0 +1,4 @@ +yarn.nodemanager.linux-container-executor.group=#configured value of yarn.nodemanager.linux-container-executor.group +banned.users=#comma separated list of users who can not run applications +min.user.id=1000#Prevent other super-users +allowed.system.users=##comma separated list of system users who CAN run applications diff --git a/aarch64/etc/hadoop/core-site.xml b/aarch64/etc/hadoop/core-site.xml new file mode 100644 index 0000000..d2ddf89 --- /dev/null +++ b/aarch64/etc/hadoop/core-site.xml @@ -0,0 +1,20 @@ + + + + + + + + diff --git a/aarch64/etc/hadoop/hadoop-env.cmd b/aarch64/etc/hadoop/hadoop-env.cmd new file mode 100644 index 0000000..05badc2 --- /dev/null +++ b/aarch64/etc/hadoop/hadoop-env.cmd @@ -0,0 +1,81 @@ +@echo off +@rem Licensed to the Apache Software Foundation (ASF) under one or more +@rem contributor license agreements. See the NOTICE file distributed with +@rem this work for additional information regarding copyright ownership. +@rem The ASF licenses this file to You under the Apache License, Version 2.0 +@rem (the "License"); you may not use this file except in compliance with +@rem the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. + +@rem Set Hadoop-specific environment variables here. + +@rem The only required environment variable is JAVA_HOME. All others are +@rem optional. When running a distributed configuration it is best to +@rem set JAVA_HOME in this file, so that it is correctly defined on +@rem remote nodes. + +@rem The java implementation to use. Required. +set JAVA_HOME=%JAVA_HOME% + +@rem The jsvc implementation to use. Jsvc is required to run secure datanodes. +@rem set JSVC_HOME=%JSVC_HOME% + +@rem set HADOOP_CONF_DIR= + +@rem Extra Java CLASSPATH elements. Automatically insert capacity-scheduler. +if exist %HADOOP_HOME%\contrib\capacity-scheduler ( + if not defined HADOOP_CLASSPATH ( + set HADOOP_CLASSPATH=%HADOOP_HOME%\contrib\capacity-scheduler\*.jar + ) else ( + set HADOOP_CLASSPATH=%HADOOP_CLASSPATH%;%HADOOP_HOME%\contrib\capacity-scheduler\*.jar + ) +) + +@rem The maximum amount of heap to use, in MB. Default is 1000. +@rem set HADOOP_HEAPSIZE= +@rem set HADOOP_NAMENODE_INIT_HEAPSIZE="" + +@rem Extra Java runtime options. Empty by default. +@rem set HADOOP_OPTS=%HADOOP_OPTS% -Djava.net.preferIPv4Stack=true + +@rem Command specific options appended to HADOOP_OPTS when specified +if not defined HADOOP_SECURITY_LOGGER ( + set HADOOP_SECURITY_LOGGER=INFO,RFAS +) +if not defined HDFS_AUDIT_LOGGER ( + set HDFS_AUDIT_LOGGER=INFO,NullAppender +) + +set HADOOP_NAMENODE_OPTS=-Dhadoop.security.logger=%HADOOP_SECURITY_LOGGER% -Dhdfs.audit.logger=%HDFS_AUDIT_LOGGER% %HADOOP_NAMENODE_OPTS% +set HADOOP_DATANODE_OPTS=-Dhadoop.security.logger=ERROR,RFAS %HADOOP_DATANODE_OPTS% +set HADOOP_SECONDARYNAMENODE_OPTS=-Dhadoop.security.logger=%HADOOP_SECURITY_LOGGER% -Dhdfs.audit.logger=%HDFS_AUDIT_LOGGER% %HADOOP_SECONDARYNAMENODE_OPTS% + +@rem The following applies to multiple commands (fs, dfs, fsck, distcp etc) +set HADOOP_CLIENT_OPTS=-Xmx128m %HADOOP_CLIENT_OPTS% +@rem set HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData %HADOOP_JAVA_PLATFORM_OPTS%" + +@rem On secure datanodes, user to run the datanode as after dropping privileges +set HADOOP_SECURE_DN_USER=%HADOOP_SECURE_DN_USER% + +@rem Where log files are stored. %HADOOP_HOME%/logs by default. +@rem set HADOOP_LOG_DIR=%HADOOP_LOG_DIR%\%USERNAME% + +@rem Where log files are stored in the secure data environment. +set HADOOP_SECURE_DN_LOG_DIR=%HADOOP_LOG_DIR%\%HADOOP_HDFS_USER% + +@rem The directory where pid files are stored. /tmp by default. +@rem NOTE: this should be set to a directory that can only be written to by +@rem the user that will run the hadoop daemons. Otherwise there is the +@rem potential for a symlink attack. +set HADOOP_PID_DIR=%HADOOP_PID_DIR% +set HADOOP_SECURE_DN_PID_DIR=%HADOOP_PID_DIR% + +@rem A string representing this instance of hadoop. %USERNAME% by default. +set HADOOP_IDENT_STRING=%USERNAME% diff --git a/aarch64/etc/hadoop/hadoop-env.sh b/aarch64/etc/hadoop/hadoop-env.sh new file mode 100644 index 0000000..5836a8a --- /dev/null +++ b/aarch64/etc/hadoop/hadoop-env.sh @@ -0,0 +1,77 @@ +# Copyright 2011 The Apache Software Foundation +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Set Hadoop-specific environment variables here. + +# The only required environment variable is JAVA_HOME. All others are +# optional. When running a distributed configuration it is best to +# set JAVA_HOME in this file, so that it is correctly defined on +# remote nodes. + +# The java implementation to use. +export JAVA_HOME=${JAVA_HOME} + +# The jsvc implementation to use. Jsvc is required to run secure datanodes. +#export JSVC_HOME=${JSVC_HOME} + +export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"} + +# Extra Java CLASSPATH elements. Automatically insert capacity-scheduler. +for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do + if [ "$HADOOP_CLASSPATH" ]; then + export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f + else + export HADOOP_CLASSPATH=$f + fi +done + +# The maximum amount of heap to use, in MB. Default is 1000. +#export HADOOP_HEAPSIZE= +#export HADOOP_NAMENODE_INIT_HEAPSIZE="" + +# Extra Java runtime options. Empty by default. +export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true" + +# Command specific options appended to HADOOP_OPTS when specified +export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS" +export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS" + +export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS" + +# The following applies to multiple commands (fs, dfs, fsck, distcp etc) +export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS" +#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS" + +# On secure datanodes, user to run the datanode as after dropping privileges +export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER} + +# Where log files are stored. $HADOOP_HOME/logs by default. +#export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER + +# Where log files are stored in the secure data environment. +export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER} + +# The directory where pid files are stored. /tmp by default. +# NOTE: this should be set to a directory that can only be written to by +# the user that will run the hadoop daemons. Otherwise there is the +# potential for a symlink attack. +export HADOOP_PID_DIR=${HADOOP_PID_DIR} +export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR} + +# A string representing this instance of hadoop. $USER by default. +export HADOOP_IDENT_STRING=$USER diff --git a/aarch64/etc/hadoop/hadoop-metrics.properties b/aarch64/etc/hadoop/hadoop-metrics.properties new file mode 100644 index 0000000..c1b2eb7 --- /dev/null +++ b/aarch64/etc/hadoop/hadoop-metrics.properties @@ -0,0 +1,75 @@ +# Configuration of the "dfs" context for null +dfs.class=org.apache.hadoop.metrics.spi.NullContext + +# Configuration of the "dfs" context for file +#dfs.class=org.apache.hadoop.metrics.file.FileContext +#dfs.period=10 +#dfs.fileName=/tmp/dfsmetrics.log + +# Configuration of the "dfs" context for ganglia +# Pick one: Ganglia 3.0 (former) or Ganglia 3.1 (latter) +# dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext +# dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext31 +# dfs.period=10 +# dfs.servers=localhost:8649 + + +# Configuration of the "mapred" context for null +mapred.class=org.apache.hadoop.metrics.spi.NullContext + +# Configuration of the "mapred" context for file +#mapred.class=org.apache.hadoop.metrics.file.FileContext +#mapred.period=10 +#mapred.fileName=/tmp/mrmetrics.log + +# Configuration of the "mapred" context for ganglia +# Pick one: Ganglia 3.0 (former) or Ganglia 3.1 (latter) +# mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext +# mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext31 +# mapred.period=10 +# mapred.servers=localhost:8649 + + +# Configuration of the "jvm" context for null +#jvm.class=org.apache.hadoop.metrics.spi.NullContext + +# Configuration of the "jvm" context for file +#jvm.class=org.apache.hadoop.metrics.file.FileContext +#jvm.period=10 +#jvm.fileName=/tmp/jvmmetrics.log + +# Configuration of the "jvm" context for ganglia +# jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext +# jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext31 +# jvm.period=10 +# jvm.servers=localhost:8649 + +# Configuration of the "rpc" context for null +rpc.class=org.apache.hadoop.metrics.spi.NullContext + +# Configuration of the "rpc" context for file +#rpc.class=org.apache.hadoop.metrics.file.FileContext +#rpc.period=10 +#rpc.fileName=/tmp/rpcmetrics.log + +# Configuration of the "rpc" context for ganglia +# rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext +# rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext31 +# rpc.period=10 +# rpc.servers=localhost:8649 + + +# Configuration of the "ugi" context for null +ugi.class=org.apache.hadoop.metrics.spi.NullContext + +# Configuration of the "ugi" context for file +#ugi.class=org.apache.hadoop.metrics.file.FileContext +#ugi.period=10 +#ugi.fileName=/tmp/ugimetrics.log + +# Configuration of the "ugi" context for ganglia +# ugi.class=org.apache.hadoop.metrics.ganglia.GangliaContext +# ugi.class=org.apache.hadoop.metrics.ganglia.GangliaContext31 +# ugi.period=10 +# ugi.servers=localhost:8649 + diff --git a/aarch64/etc/hadoop/hadoop-metrics2.properties b/aarch64/etc/hadoop/hadoop-metrics2.properties new file mode 100644 index 0000000..c3ffe31 --- /dev/null +++ b/aarch64/etc/hadoop/hadoop-metrics2.properties @@ -0,0 +1,44 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# syntax: [prefix].[source|sink].[instance].[options] +# See javadoc of package-info.java for org.apache.hadoop.metrics2 for details + +*.sink.file.class=org.apache.hadoop.metrics2.sink.FileSink +# default sampling period, in seconds +*.period=10 + +# The namenode-metrics.out will contain metrics from all context +#namenode.sink.file.filename=namenode-metrics.out +# Specifying a special sampling period for namenode: +#namenode.sink.*.period=8 + +#datanode.sink.file.filename=datanode-metrics.out + +# the following example split metrics of different +# context to different sinks (in this case files) +#jobtracker.sink.file_jvm.context=jvm +#jobtracker.sink.file_jvm.filename=jobtracker-jvm-metrics.out +#jobtracker.sink.file_mapred.context=mapred +#jobtracker.sink.file_mapred.filename=jobtracker-mapred-metrics.out + +#tasktracker.sink.file.filename=tasktracker-metrics.out + +#maptask.sink.file.filename=maptask-metrics.out + +#reducetask.sink.file.filename=reducetask-metrics.out + diff --git a/aarch64/etc/hadoop/hadoop-policy.xml b/aarch64/etc/hadoop/hadoop-policy.xml new file mode 100644 index 0000000..491dbe7 --- /dev/null +++ b/aarch64/etc/hadoop/hadoop-policy.xml @@ -0,0 +1,219 @@ + + + + + + + + + security.client.protocol.acl + * + ACL for ClientProtocol, which is used by user code + via the DistributedFileSystem. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.client.datanode.protocol.acl + * + ACL for ClientDatanodeProtocol, the client-to-datanode protocol + for block recovery. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.datanode.protocol.acl + * + ACL for DatanodeProtocol, which is used by datanodes to + communicate with the namenode. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.inter.datanode.protocol.acl + * + ACL for InterDatanodeProtocol, the inter-datanode protocol + for updating generation timestamp. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.namenode.protocol.acl + * + ACL for NamenodeProtocol, the protocol used by the secondary + namenode to communicate with the namenode. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.admin.operations.protocol.acl + * + ACL for AdminOperationsProtocol. Used for admin commands. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.refresh.usertogroups.mappings.protocol.acl + * + ACL for RefreshUserMappingsProtocol. Used to refresh + users mappings. The ACL is a comma-separated list of user and + group names. The user and group list is separated by a blank. For + e.g. "alice,bob users,wheel". A special value of "*" means all + users are allowed. + + + + security.refresh.policy.protocol.acl + * + ACL for RefreshAuthorizationPolicyProtocol, used by the + dfsadmin and mradmin commands to refresh the security policy in-effect. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.ha.service.protocol.acl + * + ACL for HAService protocol used by HAAdmin to manage the + active and stand-by states of namenode. + + + + security.zkfc.protocol.acl + * + ACL for access to the ZK Failover Controller + + + + + security.qjournal.service.protocol.acl + * + ACL for QJournalProtocol, used by the NN to communicate with + JNs when using the QuorumJournalManager for edit logs. + + + + security.mrhs.client.protocol.acl + * + ACL for HSClientProtocol, used by job clients to + communciate with the MR History Server job status etc. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + + + security.resourcetracker.protocol.acl + * + ACL for ResourceTrackerProtocol, used by the + ResourceManager and NodeManager to communicate with each other. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.resourcemanager-administration.protocol.acl + * + ACL for ResourceManagerAdministrationProtocol, for admin commands. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.applicationclient.protocol.acl + * + ACL for ApplicationClientProtocol, used by the ResourceManager + and applications submission clients to communicate with each other. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.applicationmaster.protocol.acl + * + ACL for ApplicationMasterProtocol, used by the ResourceManager + and ApplicationMasters to communicate with each other. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.containermanagement.protocol.acl + * + ACL for ContainerManagementProtocol protocol, used by the NodeManager + and ApplicationMasters to communicate with each other. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.resourcelocalizer.protocol.acl + * + ACL for ResourceLocalizer protocol, used by the NodeManager + and ResourceLocalizer to communicate with each other. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.job.task.protocol.acl + * + ACL for TaskUmbilicalProtocol, used by the map and reduce + tasks to communicate with the parent tasktracker. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.job.client.protocol.acl + * + ACL for MRClientProtocol, used by job clients to + communciate with the MR ApplicationMaster to query job status etc. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + diff --git a/aarch64/etc/hadoop/hdfs-site.xml b/aarch64/etc/hadoop/hdfs-site.xml new file mode 100644 index 0000000..50ec146 --- /dev/null +++ b/aarch64/etc/hadoop/hdfs-site.xml @@ -0,0 +1,21 @@ + + + + + + + + + diff --git a/aarch64/etc/hadoop/httpfs-env.sh b/aarch64/etc/hadoop/httpfs-env.sh new file mode 100644 index 0000000..84c67b7 --- /dev/null +++ b/aarch64/etc/hadoop/httpfs-env.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. See accompanying LICENSE file. +# + +# Set httpfs specific environment variables here. + +# Settings for the Embedded Tomcat that runs HttpFS +# Java System properties for HttpFS should be specified in this variable +# +# export CATALINA_OPTS= + +# HttpFS logs directory +# +# export HTTPFS_LOG=${HTTPFS_HOME}/logs + +# HttpFS temporary directory +# +# export HTTPFS_TEMP=${HTTPFS_HOME}/temp + +# The HTTP port used by HttpFS +# +# export HTTPFS_HTTP_PORT=14000 + +# The Admin port used by HttpFS +# +# export HTTPFS_ADMIN_PORT=`expr ${HTTPFS_HTTP_PORT} + 1` + +# The hostname HttpFS server runs on +# +# export HTTPFS_HTTP_HOSTNAME=`hostname -f` diff --git a/aarch64/etc/hadoop/httpfs-log4j.properties b/aarch64/etc/hadoop/httpfs-log4j.properties new file mode 100644 index 0000000..284a819 --- /dev/null +++ b/aarch64/etc/hadoop/httpfs-log4j.properties @@ -0,0 +1,35 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. See accompanying LICENSE file. +# + +# If the Java System property 'httpfs.log.dir' is not defined at HttpFSServer start up time +# Setup sets its value to '${httpfs.home}/logs' + +log4j.appender.httpfs=org.apache.log4j.DailyRollingFileAppender +log4j.appender.httpfs.DatePattern='.'yyyy-MM-dd +log4j.appender.httpfs.File=${httpfs.log.dir}/httpfs.log +log4j.appender.httpfs.Append=true +log4j.appender.httpfs.layout=org.apache.log4j.PatternLayout +log4j.appender.httpfs.layout.ConversionPattern=%d{ISO8601} %5p %c{1} [%X{hostname}][%X{user}:%X{doAs}] %X{op} %m%n + +log4j.appender.httpfsaudit=org.apache.log4j.DailyRollingFileAppender +log4j.appender.httpfsaudit.DatePattern='.'yyyy-MM-dd +log4j.appender.httpfsaudit.File=${httpfs.log.dir}/httpfs-audit.log +log4j.appender.httpfsaudit.Append=true +log4j.appender.httpfsaudit.layout=org.apache.log4j.PatternLayout +log4j.appender.httpfsaudit.layout.ConversionPattern=%d{ISO8601} %5p [%X{hostname}][%X{user}:%X{doAs}] %X{op} %m%n + +log4j.logger.httpfsaudit=INFO, httpfsaudit + +log4j.logger.org.apache.hadoop.fs.http.server=INFO, httpfs +log4j.logger.org.apache.hadoop.lib=INFO, httpfs diff --git a/aarch64/etc/hadoop/httpfs-signature.secret b/aarch64/etc/hadoop/httpfs-signature.secret new file mode 100644 index 0000000..56466e9 --- /dev/null +++ b/aarch64/etc/hadoop/httpfs-signature.secret @@ -0,0 +1 @@ +hadoop httpfs secret diff --git a/aarch64/etc/hadoop/httpfs-site.xml b/aarch64/etc/hadoop/httpfs-site.xml new file mode 100644 index 0000000..4a718e1 --- /dev/null +++ b/aarch64/etc/hadoop/httpfs-site.xml @@ -0,0 +1,17 @@ + + + + + diff --git a/aarch64/etc/hadoop/log4j.properties b/aarch64/etc/hadoop/log4j.properties new file mode 100644 index 0000000..7e0834a --- /dev/null +++ b/aarch64/etc/hadoop/log4j.properties @@ -0,0 +1,231 @@ +# Copyright 2011 The Apache Software Foundation +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Define some default values that can be overridden by system properties +hadoop.root.logger=INFO,console +hadoop.log.dir=. +hadoop.log.file=hadoop.log + +# Define the root logger to the system property "hadoop.root.logger". +log4j.rootLogger=${hadoop.root.logger}, EventCounter + +# Logging Threshold +log4j.threshold=ALL + +# Null Appender +log4j.appender.NullAppender=org.apache.log4j.varia.NullAppender + +# +# Rolling File Appender - cap space usage at 5gb. +# +hadoop.log.maxfilesize=256MB +hadoop.log.maxbackupindex=20 +log4j.appender.RFA=org.apache.log4j.RollingFileAppender +log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file} + +log4j.appender.RFA.MaxFileSize=${hadoop.log.maxfilesize} +log4j.appender.RFA.MaxBackupIndex=${hadoop.log.maxbackupindex} + +log4j.appender.RFA.layout=org.apache.log4j.PatternLayout + +# Pattern format: Date LogLevel LoggerName LogMessage +log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n +# Debugging Pattern format +#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n + + +# +# Daily Rolling File Appender +# + +log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender +log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file} + +# Rollver at midnight +log4j.appender.DRFA.DatePattern=.yyyy-MM-dd + +# 30-day backup +#log4j.appender.DRFA.MaxBackupIndex=30 +log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout + +# Pattern format: Date LogLevel LoggerName LogMessage +log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n +# Debugging Pattern format +#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n + + +# +# console +# Add "console" to rootlogger above if you want to use this +# + +log4j.appender.console=org.apache.log4j.ConsoleAppender +log4j.appender.console.target=System.err +log4j.appender.console.layout=org.apache.log4j.PatternLayout +log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n + +# +# TaskLog Appender +# + +#Default values +hadoop.tasklog.taskid=null +hadoop.tasklog.iscleanup=false +hadoop.tasklog.noKeepSplits=4 +hadoop.tasklog.totalLogFileSize=100 +hadoop.tasklog.purgeLogSplits=true +hadoop.tasklog.logsRetainHours=12 + +log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender +log4j.appender.TLA.taskId=${hadoop.tasklog.taskid} +log4j.appender.TLA.isCleanup=${hadoop.tasklog.iscleanup} +log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize} + +log4j.appender.TLA.layout=org.apache.log4j.PatternLayout +log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n + +# +# HDFS block state change log from block manager +# +# Uncomment the following to suppress normal block state change +# messages from BlockManager in NameNode. +#log4j.logger.BlockStateChange=WARN + +# +#Security appender +# +hadoop.security.logger=INFO,NullAppender +hadoop.security.log.maxfilesize=256MB +hadoop.security.log.maxbackupindex=20 +log4j.category.SecurityLogger=${hadoop.security.logger} +hadoop.security.log.file=SecurityAuth-${user.name}.audit +log4j.appender.RFAS=org.apache.log4j.RollingFileAppender +log4j.appender.RFAS.File=${hadoop.log.dir}/${hadoop.security.log.file} +log4j.appender.RFAS.layout=org.apache.log4j.PatternLayout +log4j.appender.RFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n +log4j.appender.RFAS.MaxFileSize=${hadoop.security.log.maxfilesize} +log4j.appender.RFAS.MaxBackupIndex=${hadoop.security.log.maxbackupindex} + +# +# Daily Rolling Security appender +# +log4j.appender.DRFAS=org.apache.log4j.DailyRollingFileAppender +log4j.appender.DRFAS.File=${hadoop.log.dir}/${hadoop.security.log.file} +log4j.appender.DRFAS.layout=org.apache.log4j.PatternLayout +log4j.appender.DRFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n +log4j.appender.DRFAS.DatePattern=.yyyy-MM-dd + +# +# hadoop configuration logging +# + +# Uncomment the following line to turn off configuration deprecation warnings. +# log4j.logger.org.apache.hadoop.conf.Configuration.deprecation=WARN + +# +# hdfs audit logging +# +hdfs.audit.logger=INFO,NullAppender +hdfs.audit.log.maxfilesize=256MB +hdfs.audit.log.maxbackupindex=20 +log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=${hdfs.audit.logger} +log4j.additivity.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=false +log4j.appender.RFAAUDIT=org.apache.log4j.RollingFileAppender +log4j.appender.RFAAUDIT.File=${hadoop.log.dir}/hdfs-audit.log +log4j.appender.RFAAUDIT.layout=org.apache.log4j.PatternLayout +log4j.appender.RFAAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n +log4j.appender.RFAAUDIT.MaxFileSize=${hdfs.audit.log.maxfilesize} +log4j.appender.RFAAUDIT.MaxBackupIndex=${hdfs.audit.log.maxbackupindex} + +# +# mapred audit logging +# +mapred.audit.logger=INFO,NullAppender +mapred.audit.log.maxfilesize=256MB +mapred.audit.log.maxbackupindex=20 +log4j.logger.org.apache.hadoop.mapred.AuditLogger=${mapred.audit.logger} +log4j.additivity.org.apache.hadoop.mapred.AuditLogger=false +log4j.appender.MRAUDIT=org.apache.log4j.RollingFileAppender +log4j.appender.MRAUDIT.File=${hadoop.log.dir}/mapred-audit.log +log4j.appender.MRAUDIT.layout=org.apache.log4j.PatternLayout +log4j.appender.MRAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n +log4j.appender.MRAUDIT.MaxFileSize=${mapred.audit.log.maxfilesize} +log4j.appender.MRAUDIT.MaxBackupIndex=${mapred.audit.log.maxbackupindex} + +# Custom Logging levels + +#log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG +#log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG +#log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=DEBUG + +# Jets3t library +log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR + +# +# Event Counter Appender +# Sends counts of logging messages at different severity levels to Hadoop Metrics. +# +log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter + +# +# Job Summary Appender +# +# Use following logger to send summary to separate file defined by +# hadoop.mapreduce.jobsummary.log.file : +# hadoop.mapreduce.jobsummary.logger=INFO,JSA +# +hadoop.mapreduce.jobsummary.logger=${hadoop.root.logger} +hadoop.mapreduce.jobsummary.log.file=hadoop-mapreduce.jobsummary.log +hadoop.mapreduce.jobsummary.log.maxfilesize=256MB +hadoop.mapreduce.jobsummary.log.maxbackupindex=20 +log4j.appender.JSA=org.apache.log4j.RollingFileAppender +log4j.appender.JSA.File=${hadoop.log.dir}/${hadoop.mapreduce.jobsummary.log.file} +log4j.appender.JSA.MaxFileSize=${hadoop.mapreduce.jobsummary.log.maxfilesize} +log4j.appender.JSA.MaxBackupIndex=${hadoop.mapreduce.jobsummary.log.maxbackupindex} +log4j.appender.JSA.layout=org.apache.log4j.PatternLayout +log4j.appender.JSA.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n +log4j.logger.org.apache.hadoop.mapred.JobInProgress$JobSummary=${hadoop.mapreduce.jobsummary.logger} +log4j.additivity.org.apache.hadoop.mapred.JobInProgress$JobSummary=false + +# +# Yarn ResourceManager Application Summary Log +# +# Set the ResourceManager summary log filename +yarn.server.resourcemanager.appsummary.log.file=rm-appsummary.log +# Set the ResourceManager summary log level and appender +yarn.server.resourcemanager.appsummary.logger=${hadoop.root.logger} +#yarn.server.resourcemanager.appsummary.logger=INFO,RMSUMMARY + +# To enable AppSummaryLogging for the RM, +# set yarn.server.resourcemanager.appsummary.logger to +# ,RMSUMMARY in hadoop-env.sh + +# Appender for ResourceManager Application Summary Log +# Requires the following properties to be set +# - hadoop.log.dir (Hadoop Log directory) +# - yarn.server.resourcemanager.appsummary.log.file (resource manager app summary log filename) +# - yarn.server.resourcemanager.appsummary.logger (resource manager app summary log level and appender) + +log4j.logger.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=${yarn.server.resourcemanager.appsummary.logger} +log4j.additivity.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=false +log4j.appender.RMSUMMARY=org.apache.log4j.RollingFileAppender +log4j.appender.RMSUMMARY.File=${hadoop.log.dir}/${yarn.server.resourcemanager.appsummary.log.file} +log4j.appender.RMSUMMARY.MaxFileSize=256MB +log4j.appender.RMSUMMARY.MaxBackupIndex=20 +log4j.appender.RMSUMMARY.layout=org.apache.log4j.PatternLayout +log4j.appender.RMSUMMARY.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n diff --git a/aarch64/etc/hadoop/mapred-env.cmd b/aarch64/etc/hadoop/mapred-env.cmd new file mode 100644 index 0000000..610d593 --- /dev/null +++ b/aarch64/etc/hadoop/mapred-env.cmd @@ -0,0 +1,20 @@ +@echo off +@rem Licensed to the Apache Software Foundation (ASF) under one or more +@rem contributor license agreements. See the NOTICE file distributed with +@rem this work for additional information regarding copyright ownership. +@rem The ASF licenses this file to You under the Apache License, Version 2.0 +@rem (the "License"); you may not use this file except in compliance with +@rem the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. + +set HADOOP_JOB_HISTORYSERVER_HEAPSIZE=1000 + +set HADOOP_MAPRED_ROOT_LOGGER=INFO,RFA + diff --git a/aarch64/etc/hadoop/mapred-env.sh b/aarch64/etc/hadoop/mapred-env.sh new file mode 100644 index 0000000..6be1e27 --- /dev/null +++ b/aarch64/etc/hadoop/mapred-env.sh @@ -0,0 +1,27 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# export JAVA_HOME=/home/y/libexec/jdk1.6.0/ + +export HADOOP_JOB_HISTORYSERVER_HEAPSIZE=1000 + +export HADOOP_MAPRED_ROOT_LOGGER=INFO,RFA + +#export HADOOP_JOB_HISTORYSERVER_OPTS= +#export HADOOP_MAPRED_LOG_DIR="" # Where log files are stored. $HADOOP_MAPRED_HOME/logs by default. +#export HADOOP_JHS_LOGGER=INFO,RFA # Hadoop JobSummary logger. +#export HADOOP_MAPRED_PID_DIR= # The pid files are stored. /tmp by default. +#export HADOOP_MAPRED_IDENT_STRING= #A string representing this instance of hadoop. $USER by default +#export HADOOP_MAPRED_NICENESS= #The scheduling priority for daemons. Defaults to 0. diff --git a/aarch64/etc/hadoop/mapred-queues.xml.template b/aarch64/etc/hadoop/mapred-queues.xml.template new file mode 100644 index 0000000..ce6cd20 --- /dev/null +++ b/aarch64/etc/hadoop/mapred-queues.xml.template @@ -0,0 +1,92 @@ + + + + + + + + + + default + + + + + + + running + + + + + + + + + + diff --git a/aarch64/etc/hadoop/mapred-site.xml.template b/aarch64/etc/hadoop/mapred-site.xml.template new file mode 100644 index 0000000..761c352 --- /dev/null +++ b/aarch64/etc/hadoop/mapred-site.xml.template @@ -0,0 +1,21 @@ + + + + + + + + + diff --git a/aarch64/etc/hadoop/slaves b/aarch64/etc/hadoop/slaves new file mode 100644 index 0000000..2fbb50c --- /dev/null +++ b/aarch64/etc/hadoop/slaves @@ -0,0 +1 @@ +localhost diff --git a/aarch64/etc/hadoop/ssl-client.xml.example b/aarch64/etc/hadoop/ssl-client.xml.example new file mode 100644 index 0000000..a50dce4 --- /dev/null +++ b/aarch64/etc/hadoop/ssl-client.xml.example @@ -0,0 +1,80 @@ + + + + + + + ssl.client.truststore.location + + Truststore to be used by clients like distcp. Must be + specified. + + + + + ssl.client.truststore.password + + Optional. Default value is "". + + + + + ssl.client.truststore.type + jks + Optional. The keystore file format, default value is "jks". + + + + + ssl.client.truststore.reload.interval + 10000 + Truststore reload check interval, in milliseconds. + Default value is 10000 (10 seconds). + + + + + ssl.client.keystore.location + + Keystore to be used by clients like distcp. Must be + specified. + + + + + ssl.client.keystore.password + + Optional. Default value is "". + + + + + ssl.client.keystore.keypassword + + Optional. Default value is "". + + + + + ssl.client.keystore.type + jks + Optional. The keystore file format, default value is "jks". + + + + diff --git a/aarch64/etc/hadoop/ssl-server.xml.example b/aarch64/etc/hadoop/ssl-server.xml.example new file mode 100644 index 0000000..4b363ff --- /dev/null +++ b/aarch64/etc/hadoop/ssl-server.xml.example @@ -0,0 +1,77 @@ + + + + + + + ssl.server.truststore.location + + Truststore to be used by NN and DN. Must be specified. + + + + + ssl.server.truststore.password + + Optional. Default value is "". + + + + + ssl.server.truststore.type + jks + Optional. The keystore file format, default value is "jks". + + + + + ssl.server.truststore.reload.interval + 10000 + Truststore reload check interval, in milliseconds. + Default value is 10000 (10 seconds). + + + + ssl.server.keystore.location + + Keystore to be used by NN and DN. Must be specified. + + + + + ssl.server.keystore.password + + Must be specified. + + + + + ssl.server.keystore.keypassword + + Must be specified. + + + + + ssl.server.keystore.type + jks + Optional. The keystore file format, default value is "jks". + + + + diff --git a/aarch64/etc/hadoop/yarn-env.cmd b/aarch64/etc/hadoop/yarn-env.cmd new file mode 100644 index 0000000..3329f8f --- /dev/null +++ b/aarch64/etc/hadoop/yarn-env.cmd @@ -0,0 +1,60 @@ +@echo off +@rem Licensed to the Apache Software Foundation (ASF) under one or more +@rem contributor license agreements. See the NOTICE file distributed with +@rem this work for additional information regarding copyright ownership. +@rem The ASF licenses this file to You under the Apache License, Version 2.0 +@rem (the "License"); you may not use this file except in compliance with +@rem the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. + +@rem User for YARN daemons +if not defined HADOOP_YARN_USER ( + set HADOOP_YARN_USER=%yarn% +) + +if not defined YARN_CONF_DIR ( + set YARN_CONF_DIR=%HADOOP_YARN_HOME%\conf +) + +if defined YARN_HEAPSIZE ( + @rem echo run with Java heapsize %YARN_HEAPSIZE% + set JAVA_HEAP_MAX=-Xmx%YARN_HEAPSIZE%m +) + +if not defined YARN_LOG_DIR ( + set YARN_LOG_DIR=%HADOOP_YARN_HOME%\logs +) + +if not defined YARN_LOGFILE ( + set YARN_LOGFILE=yarn.log +) + +@rem default policy file for service-level authorization +if not defined YARN_POLICYFILE ( + set YARN_POLICYFILE=hadoop-policy.xml +) + +if not defined YARN_ROOT_LOGGER ( + set YARN_ROOT_LOGGER=INFO,console +) + +set YARN_OPTS=%YARN_OPTS% -Dhadoop.log.dir=%YARN_LOG_DIR% +set YARN_OPTS=%YARN_OPTS% -Dyarn.log.dir=%YARN_LOG_DIR% +set YARN_OPTS=%YARN_OPTS% -Dhadoop.log.file=%YARN_LOGFILE% +set YARN_OPTS=%YARN_OPTS% -Dyarn.log.file=%YARN_LOGFILE% +set YARN_OPTS=%YARN_OPTS% -Dyarn.home.dir=%HADOOP_YARN_HOME% +set YARN_OPTS=%YARN_OPTS% -Dyarn.id.str=%YARN_IDENT_STRING% +set YARN_OPTS=%YARN_OPTS% -Dhadoop.home.dir=%HADOOP_YARN_HOME% +set YARN_OPTS=%YARN_OPTS% -Dhadoop.root.logger=%YARN_ROOT_LOGGER% +set YARN_OPTS=%YARN_OPTS% -Dyarn.root.logger=%YARN_ROOT_LOGGER% +if defined JAVA_LIBRARY_PATH ( + set YARN_OPTS=%YARN_OPTS% -Djava.library.path=%JAVA_LIBRARY_PATH% +) +set YARN_OPTS=%YARN_OPTS% -Dyarn.policy.file=%YARN_POLICYFILE% \ No newline at end of file diff --git a/aarch64/etc/hadoop/yarn-env.sh b/aarch64/etc/hadoop/yarn-env.sh new file mode 100644 index 0000000..cfce28d --- /dev/null +++ b/aarch64/etc/hadoop/yarn-env.sh @@ -0,0 +1,112 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# User for YARN daemons +export HADOOP_YARN_USER=${HADOOP_YARN_USER:-yarn} + +# resolve links - $0 may be a softlink +export YARN_CONF_DIR="${YARN_CONF_DIR:-$HADOOP_YARN_HOME/conf}" + +# some Java parameters +# export JAVA_HOME=/home/y/libexec/jdk1.6.0/ +if [ "$JAVA_HOME" != "" ]; then + #echo "run java in $JAVA_HOME" + JAVA_HOME=$JAVA_HOME +fi + +if [ "$JAVA_HOME" = "" ]; then + echo "Error: JAVA_HOME is not set." + exit 1 +fi + +JAVA=$JAVA_HOME/bin/java +JAVA_HEAP_MAX=-Xmx1000m + +# For setting YARN specific HEAP sizes please use this +# Parameter and set appropriately +# YARN_HEAPSIZE=1000 + +# check envvars which might override default args +if [ "$YARN_HEAPSIZE" != "" ]; then + JAVA_HEAP_MAX="-Xmx""$YARN_HEAPSIZE""m" +fi + +# Resource Manager specific parameters + +# Specify the max Heapsize for the ResourceManager using a numerical value +# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set +# the value to 1000. +# This value will be overridden by an Xmx setting specified in either YARN_OPTS +# and/or YARN_RESOURCEMANAGER_OPTS. +# If not specified, the default value will be picked from either YARN_HEAPMAX +# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two. +#export YARN_RESOURCEMANAGER_HEAPSIZE=1000 + +# Specify the JVM options to be used when starting the ResourceManager. +# These options will be appended to the options specified as YARN_OPTS +# and therefore may override any similar flags set in YARN_OPTS +#export YARN_RESOURCEMANAGER_OPTS= + +# Node Manager specific parameters + +# Specify the max Heapsize for the NodeManager using a numerical value +# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set +# the value to 1000. +# This value will be overridden by an Xmx setting specified in either YARN_OPTS +# and/or YARN_NODEMANAGER_OPTS. +# If not specified, the default value will be picked from either YARN_HEAPMAX +# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two. +#export YARN_NODEMANAGER_HEAPSIZE=1000 + +# Specify the JVM options to be used when starting the NodeManager. +# These options will be appended to the options specified as YARN_OPTS +# and therefore may override any similar flags set in YARN_OPTS +#export YARN_NODEMANAGER_OPTS= + +# so that filenames w/ spaces are handled correctly in loops below +IFS= + + +# default log directory & file +if [ "$YARN_LOG_DIR" = "" ]; then + YARN_LOG_DIR="$HADOOP_YARN_HOME/logs" +fi +if [ "$YARN_LOGFILE" = "" ]; then + YARN_LOGFILE='yarn.log' +fi + +# default policy file for service-level authorization +if [ "$YARN_POLICYFILE" = "" ]; then + YARN_POLICYFILE="hadoop-policy.xml" +fi + +# restore ordinary behaviour +unset IFS + + +YARN_OPTS="$YARN_OPTS -Dhadoop.log.dir=$YARN_LOG_DIR" +YARN_OPTS="$YARN_OPTS -Dyarn.log.dir=$YARN_LOG_DIR" +YARN_OPTS="$YARN_OPTS -Dhadoop.log.file=$YARN_LOGFILE" +YARN_OPTS="$YARN_OPTS -Dyarn.log.file=$YARN_LOGFILE" +YARN_OPTS="$YARN_OPTS -Dyarn.home.dir=$YARN_COMMON_HOME" +YARN_OPTS="$YARN_OPTS -Dyarn.id.str=$YARN_IDENT_STRING" +YARN_OPTS="$YARN_OPTS -Dhadoop.root.logger=${YARN_ROOT_LOGGER:-INFO,console}" +YARN_OPTS="$YARN_OPTS -Dyarn.root.logger=${YARN_ROOT_LOGGER:-INFO,console}" +if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then + YARN_OPTS="$YARN_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH" +fi +YARN_OPTS="$YARN_OPTS -Dyarn.policy.file=$YARN_POLICYFILE" + + diff --git a/aarch64/etc/hadoop/yarn-site.xml b/aarch64/etc/hadoop/yarn-site.xml new file mode 100644 index 0000000..25292c7 --- /dev/null +++ b/aarch64/etc/hadoop/yarn-site.xml @@ -0,0 +1,19 @@ + + + + + + + diff --git a/aarch64/include/Pipes.hh b/aarch64/include/Pipes.hh new file mode 100644 index 0000000..b5d0ddd --- /dev/null +++ b/aarch64/include/Pipes.hh @@ -0,0 +1,260 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef HADOOP_PIPES_HH +#define HADOOP_PIPES_HH + +#ifdef SWIG +%module (directors="1") HadoopPipes +%include "std_string.i" +%feature("director") Mapper; +%feature("director") Reducer; +%feature("director") Partitioner; +%feature("director") RecordReader; +%feature("director") RecordWriter; +%feature("director") Factory; +#else +#include +#endif + +#include + +namespace HadoopPipes { + +/** + * This interface defines the interface between application code and the + * foreign code interface to Hadoop Map/Reduce. + */ + +/** + * A JobConf defines the properties for a job. + */ +class JobConf { +public: + virtual bool hasKey(const std::string& key) const = 0; + virtual const std::string& get(const std::string& key) const = 0; + virtual int getInt(const std::string& key) const = 0; + virtual float getFloat(const std::string& key) const = 0; + virtual bool getBoolean(const std::string&key) const = 0; + virtual ~JobConf() {} +}; + +/** + * Task context provides the information about the task and job. + */ +class TaskContext { +public: + /** + * Counter to keep track of a property and its value. + */ + class Counter { + private: + int id; + public: + Counter(int counterId) : id(counterId) {} + Counter(const Counter& counter) : id(counter.id) {} + + int getId() const { return id; } + }; + + /** + * Get the JobConf for the current task. + */ + virtual const JobConf* getJobConf() = 0; + + /** + * Get the current key. + * @return the current key + */ + virtual const std::string& getInputKey() = 0; + + /** + * Get the current value. + * @return the current value + */ + virtual const std::string& getInputValue() = 0; + + /** + * Generate an output record + */ + virtual void emit(const std::string& key, const std::string& value) = 0; + + /** + * Mark your task as having made progress without changing the status + * message. + */ + virtual void progress() = 0; + + /** + * Set the status message and call progress. + */ + virtual void setStatus(const std::string& status) = 0; + + /** + * Register a counter with the given group and name. + */ + virtual Counter* + getCounter(const std::string& group, const std::string& name) = 0; + + /** + * Increment the value of the counter with the given amount. + */ + virtual void incrementCounter(const Counter* counter, uint64_t amount) = 0; + + virtual ~TaskContext() {} +}; + +class MapContext: public TaskContext { +public: + + /** + * Access the InputSplit of the mapper. + */ + virtual const std::string& getInputSplit() = 0; + + /** + * Get the name of the key class of the input to this task. + */ + virtual const std::string& getInputKeyClass() = 0; + + /** + * Get the name of the value class of the input to this task. + */ + virtual const std::string& getInputValueClass() = 0; + +}; + +class ReduceContext: public TaskContext { +public: + /** + * Advance to the next value. + */ + virtual bool nextValue() = 0; +}; + +class Closable { +public: + virtual void close() {} + virtual ~Closable() {} +}; + +/** + * The application's mapper class to do map. + */ +class Mapper: public Closable { +public: + virtual void map(MapContext& context) = 0; +}; + +/** + * The application's reducer class to do reduce. + */ +class Reducer: public Closable { +public: + virtual void reduce(ReduceContext& context) = 0; +}; + +/** + * User code to decide where each key should be sent. + */ +class Partitioner { +public: + virtual int partition(const std::string& key, int numOfReduces) = 0; + virtual ~Partitioner() {} +}; + +/** + * For applications that want to read the input directly for the map function + * they can define RecordReaders in C++. + */ +class RecordReader: public Closable { +public: + virtual bool next(std::string& key, std::string& value) = 0; + + /** + * The progress of the record reader through the split as a value between + * 0.0 and 1.0. + */ + virtual float getProgress() = 0; +}; + +/** + * An object to write key/value pairs as they are emited from the reduce. + */ +class RecordWriter: public Closable { +public: + virtual void emit(const std::string& key, + const std::string& value) = 0; +}; + +/** + * A factory to create the necessary application objects. + */ +class Factory { +public: + virtual Mapper* createMapper(MapContext& context) const = 0; + virtual Reducer* createReducer(ReduceContext& context) const = 0; + + /** + * Create a combiner, if this application has one. + * @return the new combiner or NULL, if one is not needed + */ + virtual Reducer* createCombiner(MapContext& context) const { + return NULL; + } + + /** + * Create an application partitioner object. + * @return the new partitioner or NULL, if the default partitioner should be + * used. + */ + virtual Partitioner* createPartitioner(MapContext& context) const { + return NULL; + } + + /** + * Create an application record reader. + * @return the new RecordReader or NULL, if the Java RecordReader should be + * used. + */ + virtual RecordReader* createRecordReader(MapContext& context) const { + return NULL; + } + + /** + * Create an application record writer. + * @return the new RecordWriter or NULL, if the Java RecordWriter should be + * used. + */ + virtual RecordWriter* createRecordWriter(ReduceContext& context) const { + return NULL; + } + + virtual ~Factory() {} +}; + +/** + * Run the assigned task in the framework. + * The user's main function should set the various functions using the + * set* functions above and then call this. + * @return true, if the task succeeded. + */ +bool runTask(const Factory& factory); + +} + +#endif diff --git a/aarch64/include/SerialUtils.hh b/aarch64/include/SerialUtils.hh new file mode 100644 index 0000000..cadfd76 --- /dev/null +++ b/aarch64/include/SerialUtils.hh @@ -0,0 +1,170 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef HADOOP_SERIAL_UTILS_HH +#define HADOOP_SERIAL_UTILS_HH + +#include +#include + +namespace HadoopUtils { + + /** + * A simple exception class that records a message for the user. + */ + class Error { + private: + std::string error; + public: + + /** + * Create an error object with the given message. + */ + Error(const std::string& msg); + + /** + * Construct an error object with the given message that was created on + * the given file, line, and functino. + */ + Error(const std::string& msg, + const std::string& file, int line, const std::string& function); + + /** + * Get the error message. + */ + const std::string& getMessage() const; + }; + + /** + * Check to make sure that the condition is true, and throw an exception + * if it is not. The exception will contain the message and a description + * of the source location. + */ + #define HADOOP_ASSERT(CONDITION, MESSAGE) \ + { \ + if (!(CONDITION)) { \ + throw HadoopUtils::Error((MESSAGE), __FILE__, __LINE__, \ + __func__); \ + } \ + } + + /** + * An interface for an input stream. + */ + class InStream { + public: + /** + * Reads len bytes from the stream into the buffer. + * @param buf the buffer to read into + * @param buflen the length of the buffer + * @throws Error if there are problems reading + */ + virtual void read(void *buf, size_t len) = 0; + virtual ~InStream() {} + }; + + /** + * An interface for an output stream. + */ + class OutStream { + public: + /** + * Write the given buffer to the stream. + * @param buf the data to write + * @param len the number of bytes to write + * @throws Error if there are problems writing + */ + virtual void write(const void *buf, size_t len) = 0; + /** + * Flush the data to the underlying store. + */ + virtual void flush() = 0; + virtual ~OutStream() {} + }; + + /** + * A class to read a file as a stream. + */ + class FileInStream : public InStream { + public: + FileInStream(); + bool open(const std::string& name); + bool open(FILE* file); + void read(void *buf, size_t buflen); + bool skip(size_t nbytes); + bool close(); + virtual ~FileInStream(); + private: + /** + * The file to write to. + */ + FILE *mFile; + /** + * Does is this class responsible for closing the FILE*? + */ + bool isOwned; + }; + + /** + * A class to write a stream to a file. + */ + class FileOutStream: public OutStream { + public: + + /** + * Create a stream that isn't bound to anything. + */ + FileOutStream(); + + /** + * Create the given file, potentially overwriting an existing file. + */ + bool open(const std::string& name, bool overwrite); + bool open(FILE* file); + void write(const void* buf, size_t len); + bool advance(size_t nbytes); + void flush(); + bool close(); + virtual ~FileOutStream(); + private: + FILE *mFile; + bool isOwned; + }; + + /** + * A stream that reads from a string. + */ + class StringInStream: public InStream { + public: + StringInStream(const std::string& str); + virtual void read(void *buf, size_t buflen); + private: + const std::string& buffer; + std::string::const_iterator itr; + }; + + void serializeInt(int32_t t, OutStream& stream); + int32_t deserializeInt(InStream& stream); + void serializeLong(int64_t t, OutStream& stream); + int64_t deserializeLong(InStream& stream); + void serializeFloat(float t, OutStream& stream); + float deserializeFloat(InStream& stream); + void serializeString(const std::string& t, OutStream& stream); + void deserializeString(std::string& t, InStream& stream); +} + +#endif diff --git a/aarch64/include/StringUtils.hh b/aarch64/include/StringUtils.hh new file mode 100644 index 0000000..4720172 --- /dev/null +++ b/aarch64/include/StringUtils.hh @@ -0,0 +1,81 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef HADOOP_STRING_UTILS_HH +#define HADOOP_STRING_UTILS_HH + +#include +#include +#include + +namespace HadoopUtils { + + /** + * Convert an integer to a string. + */ + std::string toString(int32_t x); + + /** + * Convert a string to an integer. + * @throws Error if the string is not a valid integer + */ + int32_t toInt(const std::string& val); + + /** + * Convert the string to a float. + * @throws Error if the string is not a valid float + */ + float toFloat(const std::string& val); + + /** + * Convert the string to a boolean. + * @throws Error if the string is not a valid boolean value + */ + bool toBool(const std::string& val); + + /** + * Get the current time in the number of milliseconds since 1970. + */ + uint64_t getCurrentMillis(); + + /** + * Split a string into "words". Multiple deliminators are treated as a single + * word break, so no zero-length words are returned. + * @param str the string to split + * @param separator a list of characters that divide words + */ + std::vector splitString(const std::string& str, + const char* separator); + + /** + * Quote a string to avoid "\", non-printable characters, and the + * deliminators. + * @param str the string to quote + * @param deliminators the set of characters to always quote + */ + std::string quoteString(const std::string& str, + const char* deliminators); + + /** + * Unquote the given string to return the original string. + * @param str the string to unquote + */ + std::string unquoteString(const std::string& str); + +} + +#endif diff --git a/aarch64/include/TemplateFactory.hh b/aarch64/include/TemplateFactory.hh new file mode 100644 index 0000000..22e10ae --- /dev/null +++ b/aarch64/include/TemplateFactory.hh @@ -0,0 +1,96 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef HADOOP_PIPES_TEMPLATE_FACTORY_HH +#define HADOOP_PIPES_TEMPLATE_FACTORY_HH + +namespace HadoopPipes { + + template + class TemplateFactory2: public Factory { + public: + Mapper* createMapper(MapContext& context) const { + return new mapper(context); + } + Reducer* createReducer(ReduceContext& context) const { + return new reducer(context); + } + }; + + template + class TemplateFactory3: public TemplateFactory2 { + public: + Partitioner* createPartitioner(MapContext& context) const { + return new partitioner(context); + } + }; + + template + class TemplateFactory3 + : public TemplateFactory2 { + }; + + template + class TemplateFactory4 + : public TemplateFactory3{ + public: + Reducer* createCombiner(MapContext& context) const { + return new combiner(context); + } + }; + + template + class TemplateFactory4 + : public TemplateFactory3{ + }; + + template + class TemplateFactory5 + : public TemplateFactory4{ + public: + RecordReader* createRecordReader(MapContext& context) const { + return new recordReader(context); + } + }; + + template + class TemplateFactory5 + : public TemplateFactory4{ + }; + + template + class TemplateFactory + : public TemplateFactory5{ + public: + RecordWriter* createRecordWriter(ReduceContext& context) const { + return new recordWriter(context); + } + }; + + template + class TemplateFactory + : public TemplateFactory5{ + }; + +} + +#endif diff --git a/aarch64/include/hdfs.h b/aarch64/include/hdfs.h new file mode 100644 index 0000000..1871665 --- /dev/null +++ b/aarch64/include/hdfs.h @@ -0,0 +1,692 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LIBHDFS_HDFS_H +#define LIBHDFS_HDFS_H + +#include /* for EINTERNAL, etc. */ +#include /* for O_RDONLY, O_WRONLY */ +#include /* for uint64_t, etc. */ +#include /* for time_t */ + +#ifndef O_RDONLY +#define O_RDONLY 1 +#endif + +#ifndef O_WRONLY +#define O_WRONLY 2 +#endif + +#ifndef EINTERNAL +#define EINTERNAL 255 +#endif + + +/** All APIs set errno to meaningful values */ + +#ifdef __cplusplus +extern "C" { +#endif + /** + * Some utility decls used in libhdfs. + */ + struct hdfsBuilder; + typedef int32_t tSize; /// size of data for read/write io ops + typedef time_t tTime; /// time type in seconds + typedef int64_t tOffset;/// offset within the file + typedef uint16_t tPort; /// port + typedef enum tObjectKind { + kObjectKindFile = 'F', + kObjectKindDirectory = 'D', + } tObjectKind; + + + /** + * The C reflection of org.apache.org.hadoop.FileSystem . + */ + struct hdfs_internal; + typedef struct hdfs_internal* hdfsFS; + + struct hdfsFile_internal; + typedef struct hdfsFile_internal* hdfsFile; + + /** + * Determine if a file is open for read. + * + * @param file The HDFS file + * @return 1 if the file is open for read; 0 otherwise + */ + int hdfsFileIsOpenForRead(hdfsFile file); + + /** + * Determine if a file is open for write. + * + * @param file The HDFS file + * @return 1 if the file is open for write; 0 otherwise + */ + int hdfsFileIsOpenForWrite(hdfsFile file); + + struct hdfsReadStatistics { + uint64_t totalBytesRead; + uint64_t totalLocalBytesRead; + uint64_t totalShortCircuitBytesRead; + }; + + /** + * Get read statistics about a file. This is only applicable to files + * opened for reading. + * + * @param file The HDFS file + * @param stats (out parameter) on a successful return, the read + * statistics. Unchanged otherwise. You must free the + * returned statistics with hdfsFileFreeReadStatistics. + * @return 0 if the statistics were successfully returned, + * -1 otherwise. On a failure, please check errno against + * ENOTSUP. webhdfs, LocalFilesystem, and so forth may + * not support read statistics. + */ + int hdfsFileGetReadStatistics(hdfsFile file, + struct hdfsReadStatistics **stats); + + /** + * @param stats HDFS read statistics for a file. + * + * @return the number of remote bytes read. + */ + int64_t hdfsReadStatisticsGetRemoteBytesRead( + const struct hdfsReadStatistics *stats); + + /** + * Free some HDFS read statistics. + * + * @param stats The HDFS read statistics to free. + */ + void hdfsFileFreeReadStatistics(struct hdfsReadStatistics *stats); + + /** + * hdfsConnectAsUser - Connect to a hdfs file system as a specific user + * Connect to the hdfs. + * @param nn The NameNode. See hdfsBuilderSetNameNode for details. + * @param port The port on which the server is listening. + * @param user the user name (this is hadoop domain user). Or NULL is equivelant to hhdfsConnect(host, port) + * @return Returns a handle to the filesystem or NULL on error. + * @deprecated Use hdfsBuilderConnect instead. + */ + hdfsFS hdfsConnectAsUser(const char* nn, tPort port, const char *user); + + /** + * hdfsConnect - Connect to a hdfs file system. + * Connect to the hdfs. + * @param nn The NameNode. See hdfsBuilderSetNameNode for details. + * @param port The port on which the server is listening. + * @return Returns a handle to the filesystem or NULL on error. + * @deprecated Use hdfsBuilderConnect instead. + */ + hdfsFS hdfsConnect(const char* nn, tPort port); + + /** + * hdfsConnect - Connect to an hdfs file system. + * + * Forces a new instance to be created + * + * @param nn The NameNode. See hdfsBuilderSetNameNode for details. + * @param port The port on which the server is listening. + * @param user The user name to use when connecting + * @return Returns a handle to the filesystem or NULL on error. + * @deprecated Use hdfsBuilderConnect instead. + */ + hdfsFS hdfsConnectAsUserNewInstance(const char* nn, tPort port, const char *user ); + + /** + * hdfsConnect - Connect to an hdfs file system. + * + * Forces a new instance to be created + * + * @param nn The NameNode. See hdfsBuilderSetNameNode for details. + * @param port The port on which the server is listening. + * @return Returns a handle to the filesystem or NULL on error. + * @deprecated Use hdfsBuilderConnect instead. + */ + hdfsFS hdfsConnectNewInstance(const char* nn, tPort port); + + /** + * Connect to HDFS using the parameters defined by the builder. + * + * The HDFS builder will be freed, whether or not the connection was + * successful. + * + * Every successful call to hdfsBuilderConnect should be matched with a call + * to hdfsDisconnect, when the hdfsFS is no longer needed. + * + * @param bld The HDFS builder + * @return Returns a handle to the filesystem, or NULL on error. + */ + hdfsFS hdfsBuilderConnect(struct hdfsBuilder *bld); + + /** + * Create an HDFS builder. + * + * @return The HDFS builder, or NULL on error. + */ + struct hdfsBuilder *hdfsNewBuilder(void); + + /** + * Force the builder to always create a new instance of the FileSystem, + * rather than possibly finding one in the cache. + * + * @param bld The HDFS builder + */ + void hdfsBuilderSetForceNewInstance(struct hdfsBuilder *bld); + + /** + * Set the HDFS NameNode to connect to. + * + * @param bld The HDFS builder + * @param nn The NameNode to use. + * + * If the string given is 'default', the default NameNode + * configuration will be used (from the XML configuration files) + * + * If NULL is given, a LocalFileSystem will be created. + * + * If the string starts with a protocol type such as file:// or + * hdfs://, this protocol type will be used. If not, the + * hdfs:// protocol type will be used. + * + * You may specify a NameNode port in the usual way by + * passing a string of the format hdfs://:. + * Alternately, you may set the port with + * hdfsBuilderSetNameNodePort. However, you must not pass the + * port in two different ways. + */ + void hdfsBuilderSetNameNode(struct hdfsBuilder *bld, const char *nn); + + /** + * Set the port of the HDFS NameNode to connect to. + * + * @param bld The HDFS builder + * @param port The port. + */ + void hdfsBuilderSetNameNodePort(struct hdfsBuilder *bld, tPort port); + + /** + * Set the username to use when connecting to the HDFS cluster. + * + * @param bld The HDFS builder + * @param userName The user name. The string will be shallow-copied. + */ + void hdfsBuilderSetUserName(struct hdfsBuilder *bld, const char *userName); + + /** + * Set the path to the Kerberos ticket cache to use when connecting to + * the HDFS cluster. + * + * @param bld The HDFS builder + * @param kerbTicketCachePath The Kerberos ticket cache path. The string + * will be shallow-copied. + */ + void hdfsBuilderSetKerbTicketCachePath(struct hdfsBuilder *bld, + const char *kerbTicketCachePath); + + /** + * Free an HDFS builder. + * + * It is normally not necessary to call this function since + * hdfsBuilderConnect frees the builder. + * + * @param bld The HDFS builder + */ + void hdfsFreeBuilder(struct hdfsBuilder *bld); + + /** + * Set a configuration string for an HdfsBuilder. + * + * @param key The key to set. + * @param val The value, or NULL to set no value. + * This will be shallow-copied. You are responsible for + * ensuring that it remains valid until the builder is + * freed. + * + * @return 0 on success; nonzero error code otherwise. + */ + int hdfsBuilderConfSetStr(struct hdfsBuilder *bld, const char *key, + const char *val); + + /** + * Get a configuration string. + * + * @param key The key to find + * @param val (out param) The value. This will be set to NULL if the + * key isn't found. You must free this string with + * hdfsConfStrFree. + * + * @return 0 on success; nonzero error code otherwise. + * Failure to find the key is not an error. + */ + int hdfsConfGetStr(const char *key, char **val); + + /** + * Get a configuration integer. + * + * @param key The key to find + * @param val (out param) The value. This will NOT be changed if the + * key isn't found. + * + * @return 0 on success; nonzero error code otherwise. + * Failure to find the key is not an error. + */ + int hdfsConfGetInt(const char *key, int32_t *val); + + /** + * Free a configuration string found with hdfsConfGetStr. + * + * @param val A configuration string obtained from hdfsConfGetStr + */ + void hdfsConfStrFree(char *val); + + /** + * hdfsDisconnect - Disconnect from the hdfs file system. + * Disconnect from hdfs. + * @param fs The configured filesystem handle. + * @return Returns 0 on success, -1 on error. + * Even if there is an error, the resources associated with the + * hdfsFS will be freed. + */ + int hdfsDisconnect(hdfsFS fs); + + + /** + * hdfsOpenFile - Open a hdfs file in given mode. + * @param fs The configured filesystem handle. + * @param path The full path to the file. + * @param flags - an | of bits/fcntl.h file flags - supported flags are O_RDONLY, O_WRONLY (meaning create or overwrite i.e., implies O_TRUNCAT), + * O_WRONLY|O_APPEND. Other flags are generally ignored other than (O_RDWR || (O_EXCL & O_CREAT)) which return NULL and set errno equal ENOTSUP. + * @param bufferSize Size of buffer for read/write - pass 0 if you want + * to use the default configured values. + * @param replication Block replication - pass 0 if you want to use + * the default configured values. + * @param blocksize Size of block - pass 0 if you want to use the + * default configured values. + * @return Returns the handle to the open file or NULL on error. + */ + hdfsFile hdfsOpenFile(hdfsFS fs, const char* path, int flags, + int bufferSize, short replication, tSize blocksize); + + + /** + * hdfsCloseFile - Close an open file. + * @param fs The configured filesystem handle. + * @param file The file handle. + * @return Returns 0 on success, -1 on error. + * On error, errno will be set appropriately. + * If the hdfs file was valid, the memory associated with it will + * be freed at the end of this call, even if there was an I/O + * error. + */ + int hdfsCloseFile(hdfsFS fs, hdfsFile file); + + + /** + * hdfsExists - Checks if a given path exsits on the filesystem + * @param fs The configured filesystem handle. + * @param path The path to look for + * @return Returns 0 on success, -1 on error. + */ + int hdfsExists(hdfsFS fs, const char *path); + + + /** + * hdfsSeek - Seek to given offset in file. + * This works only for files opened in read-only mode. + * @param fs The configured filesystem handle. + * @param file The file handle. + * @param desiredPos Offset into the file to seek into. + * @return Returns 0 on success, -1 on error. + */ + int hdfsSeek(hdfsFS fs, hdfsFile file, tOffset desiredPos); + + + /** + * hdfsTell - Get the current offset in the file, in bytes. + * @param fs The configured filesystem handle. + * @param file The file handle. + * @return Current offset, -1 on error. + */ + tOffset hdfsTell(hdfsFS fs, hdfsFile file); + + + /** + * hdfsRead - Read data from an open file. + * @param fs The configured filesystem handle. + * @param file The file handle. + * @param buffer The buffer to copy read bytes into. + * @param length The length of the buffer. + * @return On success, a positive number indicating how many bytes + * were read. + * On end-of-file, 0. + * On error, -1. Errno will be set to the error code. + * Just like the POSIX read function, hdfsRead will return -1 + * and set errno to EINTR if data is temporarily unavailable, + * but we are not yet at the end of the file. + */ + tSize hdfsRead(hdfsFS fs, hdfsFile file, void* buffer, tSize length); + + /** + * hdfsPread - Positional read of data from an open file. + * @param fs The configured filesystem handle. + * @param file The file handle. + * @param position Position from which to read + * @param buffer The buffer to copy read bytes into. + * @param length The length of the buffer. + * @return See hdfsRead + */ + tSize hdfsPread(hdfsFS fs, hdfsFile file, tOffset position, + void* buffer, tSize length); + + + /** + * hdfsWrite - Write data into an open file. + * @param fs The configured filesystem handle. + * @param file The file handle. + * @param buffer The data. + * @param length The no. of bytes to write. + * @return Returns the number of bytes written, -1 on error. + */ + tSize hdfsWrite(hdfsFS fs, hdfsFile file, const void* buffer, + tSize length); + + + /** + * hdfsWrite - Flush the data. + * @param fs The configured filesystem handle. + * @param file The file handle. + * @return Returns 0 on success, -1 on error. + */ + int hdfsFlush(hdfsFS fs, hdfsFile file); + + + /** + * hdfsHFlush - Flush out the data in client's user buffer. After the + * return of this call, new readers will see the data. + * @param fs configured filesystem handle + * @param file file handle + * @return 0 on success, -1 on error and sets errno + */ + int hdfsHFlush(hdfsFS fs, hdfsFile file); + + + /** + * hdfsHSync - Similar to posix fsync, Flush out the data in client's + * user buffer. all the way to the disk device (but the disk may have + * it in its cache). + * @param fs configured filesystem handle + * @param file file handle + * @return 0 on success, -1 on error and sets errno + */ + int hdfsHSync(hdfsFS fs, hdfsFile file); + + + /** + * hdfsAvailable - Number of bytes that can be read from this + * input stream without blocking. + * @param fs The configured filesystem handle. + * @param file The file handle. + * @return Returns available bytes; -1 on error. + */ + int hdfsAvailable(hdfsFS fs, hdfsFile file); + + + /** + * hdfsCopy - Copy file from one filesystem to another. + * @param srcFS The handle to source filesystem. + * @param src The path of source file. + * @param dstFS The handle to destination filesystem. + * @param dst The path of destination file. + * @return Returns 0 on success, -1 on error. + */ + int hdfsCopy(hdfsFS srcFS, const char* src, hdfsFS dstFS, const char* dst); + + + /** + * hdfsMove - Move file from one filesystem to another. + * @param srcFS The handle to source filesystem. + * @param src The path of source file. + * @param dstFS The handle to destination filesystem. + * @param dst The path of destination file. + * @return Returns 0 on success, -1 on error. + */ + int hdfsMove(hdfsFS srcFS, const char* src, hdfsFS dstFS, const char* dst); + + + /** + * hdfsDelete - Delete file. + * @param fs The configured filesystem handle. + * @param path The path of the file. + * @param recursive if path is a directory and set to + * non-zero, the directory is deleted else throws an exception. In + * case of a file the recursive argument is irrelevant. + * @return Returns 0 on success, -1 on error. + */ + int hdfsDelete(hdfsFS fs, const char* path, int recursive); + + /** + * hdfsRename - Rename file. + * @param fs The configured filesystem handle. + * @param oldPath The path of the source file. + * @param newPath The path of the destination file. + * @return Returns 0 on success, -1 on error. + */ + int hdfsRename(hdfsFS fs, const char* oldPath, const char* newPath); + + + /** + * hdfsGetWorkingDirectory - Get the current working directory for + * the given filesystem. + * @param fs The configured filesystem handle. + * @param buffer The user-buffer to copy path of cwd into. + * @param bufferSize The length of user-buffer. + * @return Returns buffer, NULL on error. + */ + char* hdfsGetWorkingDirectory(hdfsFS fs, char *buffer, size_t bufferSize); + + + /** + * hdfsSetWorkingDirectory - Set the working directory. All relative + * paths will be resolved relative to it. + * @param fs The configured filesystem handle. + * @param path The path of the new 'cwd'. + * @return Returns 0 on success, -1 on error. + */ + int hdfsSetWorkingDirectory(hdfsFS fs, const char* path); + + + /** + * hdfsCreateDirectory - Make the given file and all non-existent + * parents into directories. + * @param fs The configured filesystem handle. + * @param path The path of the directory. + * @return Returns 0 on success, -1 on error. + */ + int hdfsCreateDirectory(hdfsFS fs, const char* path); + + + /** + * hdfsSetReplication - Set the replication of the specified + * file to the supplied value + * @param fs The configured filesystem handle. + * @param path The path of the file. + * @return Returns 0 on success, -1 on error. + */ + int hdfsSetReplication(hdfsFS fs, const char* path, int16_t replication); + + + /** + * hdfsFileInfo - Information about a file/directory. + */ + typedef struct { + tObjectKind mKind; /* file or directory */ + char *mName; /* the name of the file */ + tTime mLastMod; /* the last modification time for the file in seconds */ + tOffset mSize; /* the size of the file in bytes */ + short mReplication; /* the count of replicas */ + tOffset mBlockSize; /* the block size for the file */ + char *mOwner; /* the owner of the file */ + char *mGroup; /* the group associated with the file */ + short mPermissions; /* the permissions associated with the file */ + tTime mLastAccess; /* the last access time for the file in seconds */ + } hdfsFileInfo; + + + /** + * hdfsListDirectory - Get list of files/directories for a given + * directory-path. hdfsFreeFileInfo should be called to deallocate memory. + * @param fs The configured filesystem handle. + * @param path The path of the directory. + * @param numEntries Set to the number of files/directories in path. + * @return Returns a dynamically-allocated array of hdfsFileInfo + * objects; NULL on error. + */ + hdfsFileInfo *hdfsListDirectory(hdfsFS fs, const char* path, + int *numEntries); + + + /** + * hdfsGetPathInfo - Get information about a path as a (dynamically + * allocated) single hdfsFileInfo struct. hdfsFreeFileInfo should be + * called when the pointer is no longer needed. + * @param fs The configured filesystem handle. + * @param path The path of the file. + * @return Returns a dynamically-allocated hdfsFileInfo object; + * NULL on error. + */ + hdfsFileInfo *hdfsGetPathInfo(hdfsFS fs, const char* path); + + + /** + * hdfsFreeFileInfo - Free up the hdfsFileInfo array (including fields) + * @param hdfsFileInfo The array of dynamically-allocated hdfsFileInfo + * objects. + * @param numEntries The size of the array. + */ + void hdfsFreeFileInfo(hdfsFileInfo *hdfsFileInfo, int numEntries); + + + /** + * hdfsGetHosts - Get hostnames where a particular block (determined by + * pos & blocksize) of a file is stored. The last element in the array + * is NULL. Due to replication, a single block could be present on + * multiple hosts. + * @param fs The configured filesystem handle. + * @param path The path of the file. + * @param start The start of the block. + * @param length The length of the block. + * @return Returns a dynamically-allocated 2-d array of blocks-hosts; + * NULL on error. + */ + char*** hdfsGetHosts(hdfsFS fs, const char* path, + tOffset start, tOffset length); + + + /** + * hdfsFreeHosts - Free up the structure returned by hdfsGetHosts + * @param hdfsFileInfo The array of dynamically-allocated hdfsFileInfo + * objects. + * @param numEntries The size of the array. + */ + void hdfsFreeHosts(char ***blockHosts); + + + /** + * hdfsGetDefaultBlockSize - Get the default blocksize. + * + * @param fs The configured filesystem handle. + * @deprecated Use hdfsGetDefaultBlockSizeAtPath instead. + * + * @return Returns the default blocksize, or -1 on error. + */ + tOffset hdfsGetDefaultBlockSize(hdfsFS fs); + + + /** + * hdfsGetDefaultBlockSizeAtPath - Get the default blocksize at the + * filesystem indicated by a given path. + * + * @param fs The configured filesystem handle. + * @param path The given path will be used to locate the actual + * filesystem. The full path does not have to exist. + * + * @return Returns the default blocksize, or -1 on error. + */ + tOffset hdfsGetDefaultBlockSizeAtPath(hdfsFS fs, const char *path); + + + /** + * hdfsGetCapacity - Return the raw capacity of the filesystem. + * @param fs The configured filesystem handle. + * @return Returns the raw-capacity; -1 on error. + */ + tOffset hdfsGetCapacity(hdfsFS fs); + + + /** + * hdfsGetUsed - Return the total raw size of all files in the filesystem. + * @param fs The configured filesystem handle. + * @return Returns the total-size; -1 on error. + */ + tOffset hdfsGetUsed(hdfsFS fs); + + /** + * Change the user and/or group of a file or directory. + * + * @param fs The configured filesystem handle. + * @param path the path to the file or directory + * @param owner User string. Set to NULL for 'no change' + * @param group Group string. Set to NULL for 'no change' + * @return 0 on success else -1 + */ + int hdfsChown(hdfsFS fs, const char* path, const char *owner, + const char *group); + + /** + * hdfsChmod + * @param fs The configured filesystem handle. + * @param path the path to the file or directory + * @param mode the bitmask to set it to + * @return 0 on success else -1 + */ + int hdfsChmod(hdfsFS fs, const char* path, short mode); + + /** + * hdfsUtime + * @param fs The configured filesystem handle. + * @param path the path to the file or directory + * @param mtime new modification time or -1 for no change + * @param atime new access time or -1 for no change + * @return 0 on success else -1 + */ + int hdfsUtime(hdfsFS fs, const char* path, tTime mtime, tTime atime); + +#ifdef __cplusplus +} +#endif + +#endif /*LIBHDFS_HDFS_H*/ + +/** + * vim: ts=4: sw=4: et + */ diff --git a/aarch64/lib/native/libhadoop.a b/aarch64/lib/native/libhadoop.a new file mode 100644 index 0000000..28ffd82 Binary files /dev/null and b/aarch64/lib/native/libhadoop.a differ diff --git a/aarch64/lib/native/libhadoop.so b/aarch64/lib/native/libhadoop.so new file mode 120000 index 0000000..e9aafc2 --- /dev/null +++ b/aarch64/lib/native/libhadoop.so @@ -0,0 +1 @@ +libhadoop.so.1.0.0 \ No newline at end of file diff --git a/aarch64/lib/native/libhadoop.so.1.0.0 b/aarch64/lib/native/libhadoop.so.1.0.0 new file mode 100755 index 0000000..c516c9e Binary files /dev/null and b/aarch64/lib/native/libhadoop.so.1.0.0 differ diff --git a/aarch64/lib/native/libhadooppipes.a b/aarch64/lib/native/libhadooppipes.a new file mode 100644 index 0000000..6f8eab2 Binary files /dev/null and b/aarch64/lib/native/libhadooppipes.a differ diff --git a/aarch64/lib/native/libhadooputils.a b/aarch64/lib/native/libhadooputils.a new file mode 100644 index 0000000..e2bfa57 Binary files /dev/null and b/aarch64/lib/native/libhadooputils.a differ diff --git a/aarch64/lib/native/libhdfs.a b/aarch64/lib/native/libhdfs.a new file mode 100644 index 0000000..845dc91 Binary files /dev/null and b/aarch64/lib/native/libhdfs.a differ diff --git a/aarch64/lib/native/libhdfs.so b/aarch64/lib/native/libhdfs.so new file mode 120000 index 0000000..2f587b5 --- /dev/null +++ b/aarch64/lib/native/libhdfs.so @@ -0,0 +1 @@ +libhdfs.so.0.0.0 \ No newline at end of file diff --git a/aarch64/lib/native/libhdfs.so.0.0.0 b/aarch64/lib/native/libhdfs.so.0.0.0 new file mode 100755 index 0000000..a134032 Binary files /dev/null and b/aarch64/lib/native/libhdfs.so.0.0.0 differ diff --git a/aarch64/libexec/hadoop-config.cmd b/aarch64/libexec/hadoop-config.cmd new file mode 100755 index 0000000..3e6e457 --- /dev/null +++ b/aarch64/libexec/hadoop-config.cmd @@ -0,0 +1,292 @@ +@echo off +@rem Licensed to the Apache Software Foundation (ASF) under one or more +@rem contributor license agreements. See the NOTICE file distributed with +@rem this work for additional information regarding copyright ownership. +@rem The ASF licenses this file to You under the Apache License, Version 2.0 +@rem (the "License"); you may not use this file except in compliance with +@rem the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. + +@rem included in all the hadoop scripts with source command +@rem should not be executable directly +@rem also should not be passed any arguments, since we need original %* + +if not defined HADOOP_COMMON_DIR ( + set HADOOP_COMMON_DIR=share\hadoop\common +) +if not defined HADOOP_COMMON_LIB_JARS_DIR ( + set HADOOP_COMMON_LIB_JARS_DIR=share\hadoop\common\lib +) +if not defined HADOOP_COMMON_LIB_NATIVE_DIR ( + set HADOOP_COMMON_LIB_NATIVE_DIR=lib\native +) +if not defined HDFS_DIR ( + set HDFS_DIR=share\hadoop\hdfs +) +if not defined HDFS_LIB_JARS_DIR ( + set HDFS_LIB_JARS_DIR=share\hadoop\hdfs\lib +) +if not defined YARN_DIR ( + set YARN_DIR=share\hadoop\yarn +) +if not defined YARN_LIB_JARS_DIR ( + set YARN_LIB_JARS_DIR=share\hadoop\yarn\lib +) +if not defined MAPRED_DIR ( + set MAPRED_DIR=share\hadoop\mapreduce +) +if not defined MAPRED_LIB_JARS_DIR ( + set MAPRED_LIB_JARS_DIR=share\hadoop\mapreduce\lib +) + +@rem the root of the Hadoop installation +set HADOOP_HOME=%~dp0 +for %%i in (%HADOOP_HOME%.) do ( + set HADOOP_HOME=%%~dpi +) +if "%HADOOP_HOME:~-1%" == "\" ( + set HADOOP_HOME=%HADOOP_HOME:~0,-1% +) + +if not exist %HADOOP_HOME%\share\hadoop\common\hadoop-common-*.jar ( + @echo +================================================================+ + @echo ^| Error: HADOOP_HOME is not set correctly ^| + @echo +----------------------------------------------------------------+ + @echo ^| Please set your HADOOP_HOME variable to the absolute path of ^| + @echo ^| the directory that contains the hadoop distribution ^| + @echo +================================================================+ + exit /b 1 +) + +set HADOOP_CONF_DIR=%HADOOP_HOME%\etc\hadoop + +@rem +@rem Allow alternate conf dir location. +@rem + +if "%1" == "--config" ( + set HADOOP_CONF_DIR=%2 + shift + shift +) + +@rem +@rem check to see it is specified whether to use the slaves or the +@rem masters file +@rem + +if "%1" == "--hosts" ( + set HADOOP_SLAVES=%HADOOP_CONF_DIR%\%2 + shift + shift +) + +if exist %HADOOP_CONF_DIR%\hadoop-env.cmd ( + call %HADOOP_CONF_DIR%\hadoop-env.cmd +) + +@rem +@rem setup java environment variables +@rem + +if not defined JAVA_HOME ( + echo Error: JAVA_HOME is not set. + goto :eof +) + +if not exist %JAVA_HOME%\bin\java.exe ( + echo Error: JAVA_HOME is incorrectly set. + echo Please update %HADOOP_HOME%\conf\hadoop-env.cmd + goto :eof +) + +set JAVA=%JAVA_HOME%\bin\java +@rem some Java parameters +set JAVA_HEAP_MAX=-Xmx1000m + +@rem +@rem check envvars which might override default args +@rem + +if defined HADOOP_HEAPSIZE ( + set JAVA_HEAP_MAX=-Xmx%HADOOP_HEAPSIZE%m +) + +@rem +@rem CLASSPATH initially contains %HADOOP_CONF_DIR% +@rem + +set CLASSPATH=%HADOOP_CONF_DIR% + +if not defined HADOOP_COMMON_HOME ( + if exist %HADOOP_HOME%\share\hadoop\common ( + set HADOOP_COMMON_HOME=%HADOOP_HOME% + ) +) + +@rem +@rem for releases, add core hadoop jar & webapps to CLASSPATH +@rem + +if exist %HADOOP_COMMON_HOME%\%HADOOP_COMMON_DIR%\webapps ( + set CLASSPATH=!CLASSPATH!;%HADOOP_COMMON_HOME%\%HADOOP_COMMON_DIR% +) + +if exist %HADOOP_COMMON_HOME%\%HADOOP_COMMON_LIB_JARS_DIR% ( + set CLASSPATH=!CLASSPATH!;%HADOOP_COMMON_HOME%\%HADOOP_COMMON_LIB_JARS_DIR%\* +) + +set CLASSPATH=!CLASSPATH!;%HADOOP_COMMON_HOME%\%HADOOP_COMMON_DIR%\* + +@rem +@rem add user-specified CLASSPATH last +@rem + +if defined HADOOP_CLASSPATH ( + if defined HADOOP_USER_CLASSPATH_FIRST ( + set CLASSPATH=%HADOOP_CLASSPATH%;%CLASSPATH%; + ) else ( + set CLASSPATH=%CLASSPATH%;%HADOOP_CLASSPATH%; + ) +) + +@rem +@rem default log directory % file +@rem + +if not defined HADOOP_LOG_DIR ( + set HADOOP_LOG_DIR=%HADOOP_HOME%\logs +) + +if not defined HADOOP_LOGFILE ( + set HADOOP_LOGFILE=hadoop.log +) + +if not defined HADOOP_ROOT_LOGGER ( + set HADOOP_ROOT_LOGGER=INFO,console +) + +@rem +@rem default policy file for service-level authorization +@rem + +if not defined HADOOP_POLICYFILE ( + set HADOOP_POLICYFILE=hadoop-policy.xml +) + +@rem +@rem Determine the JAVA_PLATFORM +@rem + +for /f "delims=" %%A in ('%JAVA% -Xmx32m %HADOOP_JAVA_PLATFORM_OPTS% -classpath "%CLASSPATH%" org.apache.hadoop.util.PlatformName') do set JAVA_PLATFORM=%%A +@rem replace space with underscore +set JAVA_PLATFORM=%JAVA_PLATFORM: =_% + +@rem +@rem setup 'java.library.path' for native hadoop code if necessary +@rem + +@rem Check if we're running hadoop directly from the build +set JAVA_LIBRARY_PATH= +if exist %HADOOP_COMMON_HOME%\target\bin ( + set JAVA_LIBRARY_PATH=%HADOOP_COMMON_HOME%\target\bin +) + +@rem For the distro case, check the bin folder +if exist %HADOOP_COMMON_HOME%\bin ( + set JAVA_LIBRARY_PATH=%JAVA_LIBRARY_PATH%;%HADOOP_COMMON_HOME%\bin +) + +@rem +@rem setup a default TOOL_PATH +@rem +set TOOL_PATH=%HADOOP_HOME%\share\hadoop\tools\lib\* + +set HADOOP_OPTS=%HADOOP_OPTS% -Dhadoop.log.dir=%HADOOP_LOG_DIR% +set HADOOP_OPTS=%HADOOP_OPTS% -Dhadoop.log.file=%HADOOP_LOGFILE% +set HADOOP_OPTS=%HADOOP_OPTS% -Dhadoop.home.dir=%HADOOP_HOME% +set HADOOP_OPTS=%HADOOP_OPTS% -Dhadoop.id.str=%HADOOP_IDENT_STRING% +set HADOOP_OPTS=%HADOOP_OPTS% -Dhadoop.root.logger=%HADOOP_ROOT_LOGGER% + +if defined JAVA_LIBRARY_PATH ( + set HADOOP_OPTS=%HADOOP_OPTS% -Djava.library.path=%JAVA_LIBRARY_PATH% +) +set HADOOP_OPTS=%HADOOP_OPTS% -Dhadoop.policy.file=%HADOOP_POLICYFILE% + +@rem +@rem Disable ipv6 as it can cause issues +@rem + +set HADOOP_OPTS=%HADOOP_OPTS% -Djava.net.preferIPv4Stack=true + +@rem +@rem put hdfs in classpath if present +@rem + +if not defined HADOOP_HDFS_HOME ( + if exist %HADOOP_HOME%\%HDFS_DIR% ( + set HADOOP_HDFS_HOME=%HADOOP_HOME% + ) +) + +if exist %HADOOP_HDFS_HOME%\%HDFS_DIR%\webapps ( + set CLASSPATH=!CLASSPATH!;%HADOOP_HDFS_HOME%\%HDFS_DIR% +) + +if exist %HADOOP_HDFS_HOME%\%HDFS_LIB_JARS_DIR% ( + set CLASSPATH=!CLASSPATH!;%HADOOP_HDFS_HOME%\%HDFS_LIB_JARS_DIR%\* +) + +set CLASSPATH=!CLASSPATH!;%HADOOP_HDFS_HOME%\%HDFS_DIR%\* + +@rem +@rem put yarn in classpath if present +@rem + +if not defined HADOOP_YARN_HOME ( + if exist %HADOOP_HOME%\%YARN_DIR% ( + set HADOOP_YARN_HOME=%HADOOP_HOME% + ) +) + +if exist %HADOOP_YARN_HOME%\%YARN_DIR%\webapps ( + set CLASSPATH=!CLASSPATH!;%HADOOP_YARN_HOME%\%YARN_DIR% +) + +if exist %HADOOP_YARN_HOME%\%YARN_LIB_JARS_DIR% ( + set CLASSPATH=!CLASSPATH!;%HADOOP_YARN_HOME%\%YARN_LIB_JARS_DIR%\* +) + +set CLASSPATH=!CLASSPATH!;%HADOOP_YARN_HOME%\%YARN_DIR%\* + +@rem +@rem put mapred in classpath if present AND different from YARN +@rem + +if not defined HADOOP_MAPRED_HOME ( + if exist %HADOOP_HOME%\%MAPRED_DIR% ( + set HADOOP_MAPRED_HOME=%HADOOP_HOME% + ) +) + +if not "%HADOOP_MAPRED_HOME%\%MAPRED_DIR%" == "%HADOOP_YARN_HOME%\%YARN_DIR%" ( + + if exist %HADOOP_MAPRED_HOME%\%MAPRED_DIR%\webapps ( + set CLASSPATH=!CLASSPATH!;%HADOOP_MAPRED_HOME%\%MAPRED_DIR% + ) + + if exist %HADOOP_MAPRED_HOME%\%MAPRED_LIB_JARS_DIR% ( + set CLASSPATH=!CLASSPATH!;%HADOOP_MAPRED_HOME%\%MAPRED_LIB_JARS_DIR%\* + ) + + set CLASSPATH=!CLASSPATH!;%HADOOP_MAPRED_HOME%\%MAPRED_DIR%\* +) + +:eof diff --git a/aarch64/libexec/hadoop-config.sh b/aarch64/libexec/hadoop-config.sh new file mode 100755 index 0000000..e5c40fc --- /dev/null +++ b/aarch64/libexec/hadoop-config.sh @@ -0,0 +1,295 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# included in all the hadoop scripts with source command +# should not be executable directly +# also should not be passed any arguments, since we need original $* + +# Resolve links ($0 may be a softlink) and convert a relative path +# to an absolute path. NB: The -P option requires bash built-ins +# or POSIX:2001 compliant cd and pwd. + +# HADOOP_CLASSPATH Extra Java CLASSPATH entries. +# +# HADOOP_USER_CLASSPATH_FIRST When defined, the HADOOP_CLASSPATH is +# added in the beginning of the global +# classpath. Can be defined, for example, +# by doing +# export HADOOP_USER_CLASSPATH_FIRST=true +# + +this="${BASH_SOURCE-$0}" +common_bin=$(cd -P -- "$(dirname -- "$this")" && pwd -P) +script="$(basename -- "$this")" +this="$common_bin/$script" + +[ -f "$common_bin/hadoop-layout.sh" ] && . "$common_bin/hadoop-layout.sh" + +HADOOP_COMMON_DIR=${HADOOP_COMMON_DIR:-"share/hadoop/common"} +HADOOP_COMMON_LIB_JARS_DIR=${HADOOP_COMMON_LIB_JARS_DIR:-"share/hadoop/common/lib"} +HADOOP_COMMON_LIB_NATIVE_DIR=${HADOOP_COMMON_LIB_NATIVE_DIR:-"lib/native"} +HDFS_DIR=${HDFS_DIR:-"share/hadoop/hdfs"} +HDFS_LIB_JARS_DIR=${HDFS_LIB_JARS_DIR:-"share/hadoop/hdfs/lib"} +YARN_DIR=${YARN_DIR:-"share/hadoop/yarn"} +YARN_LIB_JARS_DIR=${YARN_LIB_JARS_DIR:-"share/hadoop/yarn/lib"} +MAPRED_DIR=${MAPRED_DIR:-"share/hadoop/mapreduce"} +MAPRED_LIB_JARS_DIR=${MAPRED_LIB_JARS_DIR:-"share/hadoop/mapreduce/lib"} + +# the root of the Hadoop installation +# See HADOOP-6255 for directory structure layout +HADOOP_DEFAULT_PREFIX=$(cd -P -- "$common_bin"/.. && pwd -P) +HADOOP_PREFIX=${HADOOP_PREFIX:-$HADOOP_DEFAULT_PREFIX} +export HADOOP_PREFIX + +#check to see if the conf dir is given as an optional argument +if [ $# -gt 1 ] +then + if [ "--config" = "$1" ] + then + shift + confdir=$1 + if [ ! -d "$confdir" ]; then + echo "Error: Cannot find configuration directory: $confdir" + exit 1 + fi + shift + HADOOP_CONF_DIR=$confdir + fi +fi + +# Allow alternate conf dir location. +if [ -e "${HADOOP_PREFIX}/conf/hadoop-env.sh" ]; then + DEFAULT_CONF_DIR="conf" +else + DEFAULT_CONF_DIR="etc/hadoop" +fi + +export HADOOP_CONF_DIR="${HADOOP_CONF_DIR:-$HADOOP_PREFIX/$DEFAULT_CONF_DIR}" + +# User can specify hostnames or a file where the hostnames are (not both) +if [[ ( "$HADOOP_SLAVES" != '' ) && ( "$HADOOP_SLAVE_NAMES" != '' ) ]] ; then + echo \ + "Error: Please specify one variable HADOOP_SLAVES or " \ + "HADOOP_SLAVE_NAME and not both." + exit 1 +fi + +# Process command line options that specify hosts or file with host +# list +if [ $# -gt 1 ] +then + if [ "--hosts" = "$1" ] + then + shift + export HADOOP_SLAVES="${HADOOP_CONF_DIR}/$$1" + shift + elif [ "--hostnames" = "$1" ] + then + shift + export HADOOP_SLAVE_NAMES=$1 + shift + fi +fi + +# User can specify hostnames or a file where the hostnames are (not both) +# (same check as above but now we know it's command line options that cause +# the problem) +if [[ ( "$HADOOP_SLAVES" != '' ) && ( "$HADOOP_SLAVE_NAMES" != '' ) ]] ; then + echo \ + "Error: Please specify one of --hosts or --hostnames options and not both." + exit 1 +fi + +if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then + . "${HADOOP_CONF_DIR}/hadoop-env.sh" +fi + +# check if net.ipv6.bindv6only is set to 1 +bindv6only=$(/sbin/sysctl -n net.ipv6.bindv6only 2> /dev/null) +if [ -n "$bindv6only" ] && [ "$bindv6only" -eq "1" ] && [ "$HADOOP_ALLOW_IPV6" != "yes" ] +then + echo "Error: \"net.ipv6.bindv6only\" is set to 1 - Java networking could be broken" + echo "For more info: http://wiki.apache.org/hadoop/HadoopIPv6" + exit 1 +fi + +# Newer versions of glibc use an arena memory allocator that causes virtual +# memory usage to explode. This interacts badly with the many threads that +# we use in Hadoop. Tune the variable down to prevent vmem explosion. +export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-4} + +# Attempt to set JAVA_HOME if it is not set +if [[ -z $JAVA_HOME ]]; then + # On OSX use java_home (or /Library for older versions) + if [ "Darwin" == "$(uname -s)" ]; then + if [ -x /usr/libexec/java_home ]; then + export JAVA_HOME=($(/usr/libexec/java_home)) + else + export JAVA_HOME=(/Library/Java/Home) + fi + fi + + # Bail if we did not detect it + if [[ -z $JAVA_HOME ]]; then + echo "Error: JAVA_HOME is not set and could not be found." 1>&2 + exit 1 + fi +fi + +JAVA=$JAVA_HOME/bin/java +# some Java parameters +JAVA_HEAP_MAX=-Xmx1000m + +# check envvars which might override default args +if [ "$HADOOP_HEAPSIZE" != "" ]; then + #echo "run with heapsize $HADOOP_HEAPSIZE" + JAVA_HEAP_MAX="-Xmx""$HADOOP_HEAPSIZE""m" + #echo $JAVA_HEAP_MAX +fi + +# CLASSPATH initially contains $HADOOP_CONF_DIR +CLASSPATH="${HADOOP_CONF_DIR}" + +# so that filenames w/ spaces are handled correctly in loops below +IFS= + +if [ "$HADOOP_COMMON_HOME" = "" ]; then + if [ -d "${HADOOP_PREFIX}/$HADOOP_COMMON_DIR" ]; then + export HADOOP_COMMON_HOME=$HADOOP_PREFIX + fi +fi + +# for releases, add core hadoop jar & webapps to CLASSPATH +if [ -d "$HADOOP_COMMON_HOME/$HADOOP_COMMON_DIR/webapps" ]; then + CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/$HADOOP_COMMON_DIR +fi + +if [ -d "$HADOOP_COMMON_HOME/$HADOOP_COMMON_LIB_JARS_DIR" ]; then + CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/$HADOOP_COMMON_LIB_JARS_DIR'/*' +fi + +CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/$HADOOP_COMMON_DIR'/*' + +# default log directory & file +if [ "$HADOOP_LOG_DIR" = "" ]; then + HADOOP_LOG_DIR="$HADOOP_PREFIX/logs" +fi +if [ "$HADOOP_LOGFILE" = "" ]; then + HADOOP_LOGFILE='hadoop.log' +fi + +# default policy file for service-level authorization +if [ "$HADOOP_POLICYFILE" = "" ]; then + HADOOP_POLICYFILE="hadoop-policy.xml" +fi + +# restore ordinary behaviour +unset IFS + +# setup 'java.library.path' for native-hadoop code if necessary + +if [ -d "${HADOOP_PREFIX}/build/native" -o -d "${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR" ]; then + + if [ -d "${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR" ]; then + if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then + JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR + else + JAVA_LIBRARY_PATH=${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR + fi + fi +fi + +# setup a default TOOL_PATH +TOOL_PATH="${TOOL_PATH:-$HADOOP_PREFIX/share/hadoop/tools/lib/*}" + +HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_LOG_DIR" +HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.file=$HADOOP_LOGFILE" +HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.home.dir=$HADOOP_PREFIX" +HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_IDENT_STRING" +HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.root.logger=${HADOOP_ROOT_LOGGER:-INFO,console}" +if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then + HADOOP_OPTS="$HADOOP_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH" + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$JAVA_LIBRARY_PATH +fi +HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.policy.file=$HADOOP_POLICYFILE" + +# Disable ipv6 as it can cause issues +HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true" + +# put hdfs in classpath if present +if [ "$HADOOP_HDFS_HOME" = "" ]; then + if [ -d "${HADOOP_PREFIX}/$HDFS_DIR" ]; then + export HADOOP_HDFS_HOME=$HADOOP_PREFIX + fi +fi + +if [ -d "$HADOOP_HDFS_HOME/$HDFS_DIR/webapps" ]; then + CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/$HDFS_DIR +fi + +if [ -d "$HADOOP_HDFS_HOME/$HDFS_LIB_JARS_DIR" ]; then + CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/$HDFS_LIB_JARS_DIR'/*' +fi + +CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/$HDFS_DIR'/*' + +# put yarn in classpath if present +if [ "$HADOOP_YARN_HOME" = "" ]; then + if [ -d "${HADOOP_PREFIX}/$YARN_DIR" ]; then + export HADOOP_YARN_HOME=$HADOOP_PREFIX + fi +fi + +if [ -d "$HADOOP_YARN_HOME/$YARN_DIR/webapps" ]; then + CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/$YARN_DIR +fi + +if [ -d "$HADOOP_YARN_HOME/$YARN_LIB_JARS_DIR" ]; then + CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/$YARN_LIB_JARS_DIR'/*' +fi + +CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/$YARN_DIR'/*' + +# put mapred in classpath if present AND different from YARN +if [ "$HADOOP_MAPRED_HOME" = "" ]; then + if [ -d "${HADOOP_PREFIX}/$MAPRED_DIR" ]; then + export HADOOP_MAPRED_HOME=$HADOOP_PREFIX + fi +fi + +if [ "$HADOOP_MAPRED_HOME/$MAPRED_DIR" != "$HADOOP_YARN_HOME/$YARN_DIR" ] ; then + if [ -d "$HADOOP_MAPRED_HOME/$MAPRED_DIR/webapps" ]; then + CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/$MAPRED_DIR + fi + + if [ -d "$HADOOP_MAPRED_HOME/$MAPRED_LIB_JARS_DIR" ]; then + CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/$MAPRED_LIB_JARS_DIR'/*' + fi + + CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/$MAPRED_DIR'/*' +fi + +# Add the user-specified CLASSPATH via HADOOP_CLASSPATH +# Add it first or last depending on if user has +# set env-var HADOOP_USER_CLASSPATH_FIRST +if [ "$HADOOP_CLASSPATH" != "" ]; then + # Prefix it if its to be preceded + if [ "$HADOOP_USER_CLASSPATH_FIRST" != "" ]; then + CLASSPATH=${HADOOP_CLASSPATH}:${CLASSPATH} + else + CLASSPATH=${CLASSPATH}:${HADOOP_CLASSPATH} + fi +fi + diff --git a/aarch64/libexec/hdfs-config.cmd b/aarch64/libexec/hdfs-config.cmd new file mode 100755 index 0000000..f3aa733 --- /dev/null +++ b/aarch64/libexec/hdfs-config.cmd @@ -0,0 +1,43 @@ +@echo off +@rem Licensed to the Apache Software Foundation (ASF) under one or more +@rem contributor license agreements. See the NOTICE file distributed with +@rem this work for additional information regarding copyright ownership. +@rem The ASF licenses this file to You under the Apache License, Version 2.0 +@rem (the "License"); you may not use this file except in compliance with +@rem the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. + +@rem included in all the hdfs scripts with source command +@rem should not be executed directly + +if not defined HADOOP_BIN_PATH ( + set HADOOP_BIN_PATH=%~dp0 +) + +if "%HADOOP_BIN_PATH:~-1%" == "\" ( + set HADOOP_BIN_PATH=%HADOOP_BIN_PATH:~0,-1% +) + +set DEFAULT_LIBEXEC_DIR=%HADOOP_BIN_PATH%\..\libexec +if not defined HADOOP_LIBEXEC_DIR ( + set HADOOP_LIBEXEC_DIR=%DEFAULT_LIBEXEC_DIR% +) + +if exist %HADOOP_LIBEXEC_DIR%\hadoop-config.cmd ( + call %HADOOP_LIBEXEC_DIR%\hadoop-config.cmd %* +) else if exist %HADOOP_COMMON_HOME%\libexec\hadoop-config.cmd ( + call %HADOOP_COMMON_HOME%\libexec\hadoop-config.cmd %* +) else if exist %HADOOP_HOME%\libexec\hadoop-config.cmd ( + call %HADOOP_HOME%\libexec\hadoop-config.cmd %* +) else ( + echo Hadoop common not found. +) + +:eof diff --git a/aarch64/libexec/hdfs-config.sh b/aarch64/libexec/hdfs-config.sh new file mode 100755 index 0000000..2aabf53 --- /dev/null +++ b/aarch64/libexec/hdfs-config.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# included in all the hdfs scripts with source command +# should not be executed directly + +bin=`which "$0"` +bin=`dirname "${bin}"` +bin=`cd "$bin"; pwd` + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +if [ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]; then + . ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh +elif [ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]; then + . "$HADOOP_COMMON_HOME"/libexec/hadoop-config.sh +elif [ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]; then + . "$HADOOP_HOME"/libexec/hadoop-config.sh +else + echo "Hadoop common not found." + exit +fi diff --git a/aarch64/libexec/httpfs-config.sh b/aarch64/libexec/httpfs-config.sh new file mode 100755 index 0000000..02e1a71 --- /dev/null +++ b/aarch64/libexec/httpfs-config.sh @@ -0,0 +1,174 @@ +#!/bin/bash +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# resolve links - $0 may be a softlink +PRG="${0}" + +while [ -h "${PRG}" ]; do + ls=`ls -ld "${PRG}"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "${PRG}"`/"$link" + fi +done + +BASEDIR=`dirname ${PRG}` +BASEDIR=`cd ${BASEDIR}/..;pwd` + + +function print() { + if [ "${HTTPFS_SILENT}" != "true" ]; then + echo "$@" + fi +} + +# if HTTPFS_HOME is already set warn it will be ignored +# +if [ "${HTTPFS_HOME}" != "" ]; then + echo "WARNING: current setting of HTTPFS_HOME ignored" +fi + +print + +# setting HTTPFS_HOME to the installation dir, it cannot be changed +# +export HTTPFS_HOME=${BASEDIR} +httpfs_home=${HTTPFS_HOME} +print "Setting HTTPFS_HOME: ${HTTPFS_HOME}" + +# if the installation has a env file, source it +# this is for native packages installations +# +if [ -e "${HTTPFS_HOME}/bin/httpfs-env.sh" ]; then + print "Sourcing: ${HTTPFS_HOME}/bin/httpfs-env.sh" + source ${HTTPFS_HOME}/bin/HTTPFS-env.sh + grep "^ *export " ${HTTPFS_HOME}/bin/httpfs-env.sh | sed 's/ *export/ setting/' +fi + +# verify that the sourced env file didn't change HTTPFS_HOME +# if so, warn and revert +# +if [ "${HTTPFS_HOME}" != "${httpfs_home}" ]; then + print "WARN: HTTPFS_HOME resetting to ''${HTTPFS_HOME}'' ignored" + export HTTPFS_HOME=${httpfs_home} + print " using HTTPFS_HOME: ${HTTPFS_HOME}" +fi + +if [ "${HTTPFS_CONFIG}" = "" ]; then + export HTTPFS_CONFIG=${HTTPFS_HOME}/etc/hadoop + print "Setting HTTPFS_CONFIG: ${HTTPFS_CONFIG}" +else + print "Using HTTPFS_CONFIG: ${HTTPFS_CONFIG}" +fi +httpfs_config=${HTTPFS_CONFIG} + +# if the configuration dir has a env file, source it +# +if [ -e "${HTTPFS_CONFIG}/httpfs-env.sh" ]; then + print "Sourcing: ${HTTPFS_CONFIG}/httpfs-env.sh" + source ${HTTPFS_CONFIG}/httpfs-env.sh + grep "^ *export " ${HTTPFS_CONFIG}/httpfs-env.sh | sed 's/ *export/ setting/' +fi + +# verify that the sourced env file didn't change HTTPFS_HOME +# if so, warn and revert +# +if [ "${HTTPFS_HOME}" != "${httpfs_home}" ]; then + echo "WARN: HTTPFS_HOME resetting to ''${HTTPFS_HOME}'' ignored" + export HTTPFS_HOME=${httpfs_home} +fi + +# verify that the sourced env file didn't change HTTPFS_CONFIG +# if so, warn and revert +# +if [ "${HTTPFS_CONFIG}" != "${httpfs_config}" ]; then + echo "WARN: HTTPFS_CONFIG resetting to ''${HTTPFS_CONFIG}'' ignored" + export HTTPFS_CONFIG=${httpfs_config} +fi + +if [ "${HTTPFS_LOG}" = "" ]; then + export HTTPFS_LOG=${HTTPFS_HOME}/logs + print "Setting HTTPFS_LOG: ${HTTPFS_LOG}" +else + print "Using HTTPFS_LOG: ${HTTPFS_LOG}" +fi + +if [ ! -f ${HTTPFS_LOG} ]; then + mkdir -p ${HTTPFS_LOG} +fi + +if [ "${HTTPFS_TEMP}" = "" ]; then + export HTTPFS_TEMP=${HTTPFS_HOME}/temp + print "Setting HTTPFS_TEMP: ${HTTPFS_TEMP}" +else + print "Using HTTPFS_TEMP: ${HTTPFS_TEMP}" +fi + +if [ ! -f ${HTTPFS_TEMP} ]; then + mkdir -p ${HTTPFS_TEMP} +fi + +if [ "${HTTPFS_HTTP_PORT}" = "" ]; then + export HTTPFS_HTTP_PORT=14000 + print "Setting HTTPFS_HTTP_PORT: ${HTTPFS_HTTP_PORT}" +else + print "Using HTTPFS_HTTP_PORT: ${HTTPFS_HTTP_PORT}" +fi + +if [ "${HTTPFS_ADMIN_PORT}" = "" ]; then + export HTTPFS_ADMIN_PORT=`expr $HTTPFS_HTTP_PORT + 1` + print "Setting HTTPFS_ADMIN_PORT: ${HTTPFS_ADMIN_PORT}" +else + print "Using HTTPFS_ADMIN_PORT: ${HTTPFS_ADMIN_PORT}" +fi + +if [ "${HTTPFS_HTTP_HOSTNAME}" = "" ]; then + export HTTPFS_HTTP_HOSTNAME=`hostname -f` + print "Setting HTTPFS_HTTP_HOSTNAME: ${HTTPFS_HTTP_HOSTNAME}" +else + print "Using HTTPFS_HTTP_HOSTNAME: ${HTTPFS_HTTP_HOSTNAME}" +fi + +if [ "${CATALINA_BASE}" = "" ]; then + export CATALINA_BASE=${HTTPFS_HOME}/share/hadoop/httpfs/tomcat + print "Setting CATALINA_BASE: ${CATALINA_BASE}" +else + print "Using CATALINA_BASE: ${CATALINA_BASE}" +fi + +if [ "${HTTPFS_CATALINA_HOME}" = "" ]; then + export HTTPFS_CATALINA_HOME=${CATALINA_BASE} + print "Setting HTTPFS_CATALINA_HOME: ${HTTPFS_CATALINA_HOME}" +else + print "Using HTTPFS_CATALINA_HOME: ${HTTPFS_CATALINA_HOME}" +fi + +if [ "${CATALINA_OUT}" = "" ]; then + export CATALINA_OUT=${HTTPFS_LOG}/httpfs-catalina.out + print "Setting CATALINA_OUT: ${CATALINA_OUT}" +else + print "Using CATALINA_OUT: ${CATALINA_OUT}" +fi + +if [ "${CATALINA_PID}" = "" ]; then + export CATALINA_PID=/tmp/httpfs.pid + print "Setting CATALINA_PID: ${CATALINA_PID}" +else + print "Using CATALINA_PID: ${CATALINA_PID}" +fi + +print diff --git a/aarch64/libexec/mapred-config.cmd b/aarch64/libexec/mapred-config.cmd new file mode 100755 index 0000000..f3aa733 --- /dev/null +++ b/aarch64/libexec/mapred-config.cmd @@ -0,0 +1,43 @@ +@echo off +@rem Licensed to the Apache Software Foundation (ASF) under one or more +@rem contributor license agreements. See the NOTICE file distributed with +@rem this work for additional information regarding copyright ownership. +@rem The ASF licenses this file to You under the Apache License, Version 2.0 +@rem (the "License"); you may not use this file except in compliance with +@rem the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. + +@rem included in all the hdfs scripts with source command +@rem should not be executed directly + +if not defined HADOOP_BIN_PATH ( + set HADOOP_BIN_PATH=%~dp0 +) + +if "%HADOOP_BIN_PATH:~-1%" == "\" ( + set HADOOP_BIN_PATH=%HADOOP_BIN_PATH:~0,-1% +) + +set DEFAULT_LIBEXEC_DIR=%HADOOP_BIN_PATH%\..\libexec +if not defined HADOOP_LIBEXEC_DIR ( + set HADOOP_LIBEXEC_DIR=%DEFAULT_LIBEXEC_DIR% +) + +if exist %HADOOP_LIBEXEC_DIR%\hadoop-config.cmd ( + call %HADOOP_LIBEXEC_DIR%\hadoop-config.cmd %* +) else if exist %HADOOP_COMMON_HOME%\libexec\hadoop-config.cmd ( + call %HADOOP_COMMON_HOME%\libexec\hadoop-config.cmd %* +) else if exist %HADOOP_HOME%\libexec\hadoop-config.cmd ( + call %HADOOP_HOME%\libexec\hadoop-config.cmd %* +) else ( + echo Hadoop common not found. +) + +:eof diff --git a/aarch64/libexec/mapred-config.sh b/aarch64/libexec/mapred-config.sh new file mode 100755 index 0000000..254e0a0 --- /dev/null +++ b/aarch64/libexec/mapred-config.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# included in all the mapred scripts with source command +# should not be executed directly + +bin=`which "$0"` +bin=`dirname "${bin}"` +bin=`cd "$bin"; pwd` + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +if [ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]; then + . "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" +elif [ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]; then + . "$HADOOP_COMMON_HOME"/libexec/hadoop-config.sh +elif [ -e "${HADOOP_COMMON_HOME}/bin/hadoop-config.sh" ]; then + . "$HADOOP_COMMON_HOME"/bin/hadoop-config.sh +elif [ -e "${HADOOP_HOME}/bin/hadoop-config.sh" ]; then + . "$HADOOP_HOME"/bin/hadoop-config.sh +elif [ -e "${HADOOP_MAPRED_HOME}/bin/hadoop-config.sh" ]; then + . "$HADOOP_MAPRED_HOME"/bin/hadoop-config.sh +else + echo "Hadoop common not found." + exit +fi + +# Only set locally to use in HADOOP_OPTS. No need to export. +# The following defaults are useful when somebody directly invokes bin/mapred. +HADOOP_MAPRED_LOG_DIR=${HADOOP_MAPRED_LOG_DIR:-${HADOOP_MAPRED_HOME}/logs} +HADOOP_MAPRED_LOGFILE=${HADOOP_MAPRED_LOGFILE:-hadoop.log} +HADOOP_MAPRED_ROOT_LOGGER=${HADOOP_MAPRED_ROOT_LOGGER:-INFO,console} + +HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_MAPRED_LOG_DIR" +HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.file=$HADOOP_MAPRED_LOGFILE" +export HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.root.logger=${HADOOP_MAPRED_ROOT_LOGGER}" + + diff --git a/aarch64/libexec/yarn-config.cmd b/aarch64/libexec/yarn-config.cmd new file mode 100755 index 0000000..41c1434 --- /dev/null +++ b/aarch64/libexec/yarn-config.cmd @@ -0,0 +1,72 @@ +@echo off +@rem Licensed to the Apache Software Foundation (ASF) under one or more +@rem contributor license agreements. See the NOTICE file distributed with +@rem this work for additional information regarding copyright ownership. +@rem The ASF licenses this file to You under the Apache License, Version 2.0 +@rem (the "License"); you may not use this file except in compliance with +@rem the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. + +@rem included in all the hdfs scripts with source command +@rem should not be executed directly + +if not defined HADOOP_BIN_PATH ( + set HADOOP_BIN_PATH=%~dp0 +) + +if "%HADOOP_BIN_PATH:~-1%" == "\" ( + set HADOOP_BIN_PATH=%HADOOP_BIN_PATH:~0,-1% +) + +set DEFAULT_LIBEXEC_DIR=%HADOOP_BIN_PATH%\..\libexec +if not defined HADOOP_LIBEXEC_DIR ( + set HADOOP_LIBEXEC_DIR=%DEFAULT_LIBEXEC_DIR% +) + +if exist %HADOOP_LIBEXEC_DIR%\hadoop-config.cmd ( + call %HADOOP_LIBEXEC_DIR%\hadoop-config.cmd %* +) else if exist %HADOOP_COMMON_HOME%\libexec\hadoop-config.cmd ( + call %HADOOP_COMMON_HOME%\libexec\hadoop-config.cmd %* +) else if exist %HADOOP_HOME%\libexec\hadoop-config.cmd ( + call %HADOOP_HOME%\libexec\hadoop-config.cmd %* +) else ( + echo Hadoop common not found. +) + +@rem +@rem Allow alternate conf dir location. +@rem + +if "%1" == "--config" ( + shift + set YARN_CONF_DIR=%2 + shift +) + +if not defined YARN_CONF_DIR ( + if not defined HADOOP_CONF_DIR ( + set YARN_CONF_DIR=%HADOOP_YARN_HOME%\conf + ) else ( + set YARN_CONF_DIR=%HADOOP_CONF_DIR% + ) +) + +@rem +@rem check to see it is specified whether to use the slaves or the +@rem masters file +@rem + +if "%1" == "--hosts" ( + set YARN_SLAVES=%YARN_CONF_DIR%\%2 + shift + shift +) + +:eof diff --git a/aarch64/libexec/yarn-config.sh b/aarch64/libexec/yarn-config.sh new file mode 100755 index 0000000..3d67801 --- /dev/null +++ b/aarch64/libexec/yarn-config.sh @@ -0,0 +1,65 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# included in all the hadoop scripts with source command +# should not be executable directly +bin=`which "$0"` +bin=`dirname "${bin}"` +bin=`cd "$bin"; pwd` + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +if [ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]; then + . ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh +elif [ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]; then + . "$HADOOP_COMMON_HOME"/libexec/hadoop-config.sh +elif [ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]; then + . "$HADOOP_HOME"/libexec/hadoop-config.sh +else + echo "Hadoop common not found." + exit +fi + +# Same glibc bug that discovered in Hadoop. +# Without this you can see very large vmem settings on containers. +export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-4} + +#check to see if the conf dir is given as an optional argument +if [ $# -gt 1 ] +then + if [ "--config" = "$1" ] + then + shift + confdir=$1 + shift + YARN_CONF_DIR=$confdir + fi +fi + +# Allow alternate conf dir location. +export YARN_CONF_DIR="${HADOOP_CONF_DIR:-$HADOOP_YARN_HOME/conf}" + +#check to see it is specified whether to use the slaves or the +# masters file +if [ $# -gt 1 ] +then + if [ "--hosts" = "$1" ] + then + shift + slavesfile=$1 + shift + export YARN_SLAVES="${YARN_CONF_DIR}/$slavesfile" + fi +fi diff --git a/aarch64/sbin/distribute-exclude.sh b/aarch64/sbin/distribute-exclude.sh new file mode 100755 index 0000000..66fc14a --- /dev/null +++ b/aarch64/sbin/distribute-exclude.sh @@ -0,0 +1,81 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# ------------------------------------------------------------------ +# +# The purpose of this script is to distribute the exclude file (see +# "dfs.hosts.exclude" in hdfs-site.xml). +# +# Input of the script is a local exclude file. The exclude file +# will be distributed to all the namenodes. The location on the namenodes +# is determined by the configuration "dfs.hosts.exclude" in hdfs-site.xml +# (this value is read from the local copy of hdfs-site.xml and must be same +# on all the namenodes). +# +# The user running this script needs write permissions on the target +# directory on namenodes. +# +# After this command, run refresh-namenodes.sh so that namenodes start +# using the new exclude file. + +bin=`dirname "$0"` +bin=`cd "$bin"; pwd` + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +. $HADOOP_LIBEXEC_DIR/hdfs-config.sh + +if [ "$1" = '' ] ; then + "Error: please specify local exclude file as a first argument" + exit 1 +else + excludeFilenameLocal=$1 +fi + +if [ ! -f "$excludeFilenameLocal" ] ; then + echo "Error: exclude file [$excludeFilenameLocal] does not exist." + exit 1 +fi + +namenodes=$("$HADOOP_PREFIX/bin/hdfs" getconf -namenodes) +excludeFilenameRemote=$("$HADOOP_PREFIX/bin/hdfs" getconf -excludeFile) + +if [ "$excludeFilenameRemote" = '' ] ; then + echo \ + "Error: hdfs getconf -excludeFile returned empty string, " \ + "please setup dfs.hosts.exclude in hdfs-site.xml in local cluster " \ + "configuration and on all namenodes" + exit 1 +fi + +echo "Copying exclude file [$excludeFilenameRemote] to namenodes:" + +for namenode in $namenodes ; do + echo " [$namenode]" + scp "$excludeFilenameLocal" "$namenode:$excludeFilenameRemote" + if [ "$?" != '0' ] ; then errorFlag='1' ; fi +done + +if [ "$errorFlag" = '1' ] ; then + echo "Error: transfer of exclude file failed, see error messages above." + exit 1 +else + echo "Transfer of exclude file to all namenodes succeeded." +fi + +# eof diff --git a/aarch64/sbin/hadoop-daemon.sh b/aarch64/sbin/hadoop-daemon.sh new file mode 100755 index 0000000..ece40ef --- /dev/null +++ b/aarch64/sbin/hadoop-daemon.sh @@ -0,0 +1,202 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Runs a Hadoop command as a daemon. +# +# Environment Variables +# +# HADOOP_CONF_DIR Alternate conf dir. Default is ${HADOOP_PREFIX}/conf. +# HADOOP_LOG_DIR Where log files are stored. PWD by default. +# HADOOP_MASTER host:path where hadoop code should be rsync'd from +# HADOOP_PID_DIR The pid files are stored. /tmp by default. +# HADOOP_IDENT_STRING A string representing this instance of hadoop. $USER by default +# HADOOP_NICENESS The scheduling priority for daemons. Defaults to 0. +## + +usage="Usage: hadoop-daemon.sh [--config ] [--hosts hostlistfile] [--script script] (start|stop) " + +# if no args specified, show usage +if [ $# -le 1 ]; then + echo $usage + exit 1 +fi + +bin=`dirname "${BASH_SOURCE-$0}"` +bin=`cd "$bin"; pwd` + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +. $HADOOP_LIBEXEC_DIR/hadoop-config.sh + +# get arguments + +#default value +hadoopScript="$HADOOP_PREFIX"/bin/hadoop +if [ "--script" = "$1" ] + then + shift + hadoopScript=$1 + shift +fi +startStop=$1 +shift +command=$1 +shift + +hadoop_rotate_log () +{ + log=$1; + num=5; + if [ -n "$2" ]; then + num=$2 + fi + if [ -f "$log" ]; then # rotate logs + while [ $num -gt 1 ]; do + prev=`expr $num - 1` + [ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num" + num=$prev + done + mv "$log" "$log.$num"; + fi +} + +if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then + . "${HADOOP_CONF_DIR}/hadoop-env.sh" +fi + +# Determine if we're starting a secure datanode, and if so, redefine appropriate variables +if [ "$command" == "datanode" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_USER" ]; then + export HADOOP_PID_DIR=$HADOOP_SECURE_DN_PID_DIR + export HADOOP_LOG_DIR=$HADOOP_SECURE_DN_LOG_DIR + export HADOOP_IDENT_STRING=$HADOOP_SECURE_DN_USER + starting_secure_dn="true" +fi + +if [ "$HADOOP_IDENT_STRING" = "" ]; then + export HADOOP_IDENT_STRING="$USER" +fi + + +# get log directory +if [ "$HADOOP_LOG_DIR" = "" ]; then + export HADOOP_LOG_DIR="$HADOOP_PREFIX/logs" +fi + +if [ ! -w "$HADOOP_LOG_DIR" ] ; then + mkdir -p "$HADOOP_LOG_DIR" + chown $HADOOP_IDENT_STRING $HADOOP_LOG_DIR +fi + +if [ "$HADOOP_PID_DIR" = "" ]; then + HADOOP_PID_DIR=/tmp +fi + +# some variables +export HADOOP_LOGFILE=hadoop-$HADOOP_IDENT_STRING-$command-$HOSTNAME.log +export HADOOP_ROOT_LOGGER=${HADOOP_ROOT_LOGGER:-"INFO,RFA"} +export HADOOP_SECURITY_LOGGER=${HADOOP_SECURITY_LOGGER:-"INFO,RFAS"} +export HDFS_AUDIT_LOGGER=${HDFS_AUDIT_LOGGER:-"INFO,NullAppender"} +log=$HADOOP_LOG_DIR/hadoop-$HADOOP_IDENT_STRING-$command-$HOSTNAME.out +pid=$HADOOP_PID_DIR/hadoop-$HADOOP_IDENT_STRING-$command.pid +HADOOP_STOP_TIMEOUT=${HADOOP_STOP_TIMEOUT:-5} + +# Set default scheduling priority +if [ "$HADOOP_NICENESS" = "" ]; then + export HADOOP_NICENESS=0 +fi + +case $startStop in + + (start) + + [ -w "$HADOOP_PID_DIR" ] || mkdir -p "$HADOOP_PID_DIR" + + if [ -f $pid ]; then + if kill -0 `cat $pid` > /dev/null 2>&1; then + echo $command running as process `cat $pid`. Stop it first. + exit 1 + fi + fi + + if [ "$HADOOP_MASTER" != "" ]; then + echo rsync from $HADOOP_MASTER + rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $HADOOP_MASTER/ "$HADOOP_PREFIX" + fi + + hadoop_rotate_log $log + echo starting $command, logging to $log + cd "$HADOOP_PREFIX" + case $command in + namenode|secondarynamenode|datanode|journalnode|dfs|dfsadmin|fsck|balancer|zkfc) + if [ -z "$HADOOP_HDFS_HOME" ]; then + hdfsScript="$HADOOP_PREFIX"/bin/hdfs + else + hdfsScript="$HADOOP_HDFS_HOME"/bin/hdfs + fi + nohup nice -n $HADOOP_NICENESS $hdfsScript --config $HADOOP_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null & + ;; + (*) + nohup nice -n $HADOOP_NICENESS $hadoopScript --config $HADOOP_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null & + ;; + esac + echo $! > $pid + sleep 1 + head "$log" + # capture the ulimit output + if [ "true" = "$starting_secure_dn" ]; then + echo "ulimit -a for secure datanode user $HADOOP_SECURE_DN_USER" >> $log + # capture the ulimit info for the appropriate user + su --shell=/bin/bash $HADOOP_SECURE_DN_USER -c 'ulimit -a' >> $log 2>&1 + else + echo "ulimit -a for user $USER" >> $log + ulimit -a >> $log 2>&1 + fi + sleep 3; + if ! ps -p $! > /dev/null ; then + exit 1 + fi + ;; + + (stop) + + if [ -f $pid ]; then + TARGET_PID=`cat $pid` + if kill -0 $TARGET_PID > /dev/null 2>&1; then + echo stopping $command + kill $TARGET_PID + sleep $HADOOP_STOP_TIMEOUT + if kill -0 $TARGET_PID > /dev/null 2>&1; then + echo "$command did not stop gracefully after $HADOOP_STOP_TIMEOUT seconds: killing with kill -9" + kill -9 $TARGET_PID + fi + else + echo no $command to stop + fi + else + echo no $command to stop + fi + ;; + + (*) + echo $usage + exit 1 + ;; + +esac + + diff --git a/aarch64/sbin/hadoop-daemons.sh b/aarch64/sbin/hadoop-daemons.sh new file mode 100755 index 0000000..181d7ac --- /dev/null +++ b/aarch64/sbin/hadoop-daemons.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Run a Hadoop command on all slave hosts. + +usage="Usage: hadoop-daemons.sh [--config confdir] [--hosts hostlistfile] [start|stop] command args..." + +# if no args specified, show usage +if [ $# -le 1 ]; then + echo $usage + exit 1 +fi + +bin=`dirname "${BASH_SOURCE-$0}"` +bin=`cd "$bin"; pwd` + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +. $HADOOP_LIBEXEC_DIR/hadoop-config.sh + +exec "$bin/slaves.sh" --config $HADOOP_CONF_DIR cd "$HADOOP_PREFIX" \; "$bin/hadoop-daemon.sh" --config $HADOOP_CONF_DIR "$@" diff --git a/aarch64/sbin/hdfs-config.cmd b/aarch64/sbin/hdfs-config.cmd new file mode 100755 index 0000000..f3aa733 --- /dev/null +++ b/aarch64/sbin/hdfs-config.cmd @@ -0,0 +1,43 @@ +@echo off +@rem Licensed to the Apache Software Foundation (ASF) under one or more +@rem contributor license agreements. See the NOTICE file distributed with +@rem this work for additional information regarding copyright ownership. +@rem The ASF licenses this file to You under the Apache License, Version 2.0 +@rem (the "License"); you may not use this file except in compliance with +@rem the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. + +@rem included in all the hdfs scripts with source command +@rem should not be executed directly + +if not defined HADOOP_BIN_PATH ( + set HADOOP_BIN_PATH=%~dp0 +) + +if "%HADOOP_BIN_PATH:~-1%" == "\" ( + set HADOOP_BIN_PATH=%HADOOP_BIN_PATH:~0,-1% +) + +set DEFAULT_LIBEXEC_DIR=%HADOOP_BIN_PATH%\..\libexec +if not defined HADOOP_LIBEXEC_DIR ( + set HADOOP_LIBEXEC_DIR=%DEFAULT_LIBEXEC_DIR% +) + +if exist %HADOOP_LIBEXEC_DIR%\hadoop-config.cmd ( + call %HADOOP_LIBEXEC_DIR%\hadoop-config.cmd %* +) else if exist %HADOOP_COMMON_HOME%\libexec\hadoop-config.cmd ( + call %HADOOP_COMMON_HOME%\libexec\hadoop-config.cmd %* +) else if exist %HADOOP_HOME%\libexec\hadoop-config.cmd ( + call %HADOOP_HOME%\libexec\hadoop-config.cmd %* +) else ( + echo Hadoop common not found. +) + +:eof diff --git a/aarch64/sbin/hdfs-config.sh b/aarch64/sbin/hdfs-config.sh new file mode 100755 index 0000000..2aabf53 --- /dev/null +++ b/aarch64/sbin/hdfs-config.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# included in all the hdfs scripts with source command +# should not be executed directly + +bin=`which "$0"` +bin=`dirname "${bin}"` +bin=`cd "$bin"; pwd` + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +if [ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]; then + . ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh +elif [ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]; then + . "$HADOOP_COMMON_HOME"/libexec/hadoop-config.sh +elif [ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]; then + . "$HADOOP_HOME"/libexec/hadoop-config.sh +else + echo "Hadoop common not found." + exit +fi diff --git a/aarch64/sbin/httpfs.sh b/aarch64/sbin/httpfs.sh new file mode 100755 index 0000000..c83a143 --- /dev/null +++ b/aarch64/sbin/httpfs.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# resolve links - $0 may be a softlink +PRG="${0}" + +while [ -h "${PRG}" ]; do + ls=`ls -ld "${PRG}"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "${PRG}"`/"$link" + fi +done + +BASEDIR=`dirname ${PRG}` +BASEDIR=`cd ${BASEDIR}/..;pwd` + +source ${HADOOP_LIBEXEC_DIR:-${BASEDIR}/libexec}/httpfs-config.sh + +# The Java System property 'httpfs.http.port' it is not used by HttpFS, +# it is used in Tomcat's server.xml configuration file +# +print "Using CATALINA_OPTS: ${CATALINA_OPTS}" + +catalina_opts="-Dhttpfs.home.dir=${HTTPFS_HOME}"; +catalina_opts="${catalina_opts} -Dhttpfs.config.dir=${HTTPFS_CONFIG}"; +catalina_opts="${catalina_opts} -Dhttpfs.log.dir=${HTTPFS_LOG}"; +catalina_opts="${catalina_opts} -Dhttpfs.temp.dir=${HTTPFS_TEMP}"; +catalina_opts="${catalina_opts} -Dhttpfs.admin.port=${HTTPFS_ADMIN_PORT}"; +catalina_opts="${catalina_opts} -Dhttpfs.http.port=${HTTPFS_HTTP_PORT}"; +catalina_opts="${catalina_opts} -Dhttpfs.http.hostname=${HTTPFS_HTTP_HOSTNAME}"; + +print "Adding to CATALINA_OPTS: ${catalina_opts}" + +export CATALINA_OPTS="${CATALINA_OPTS} ${catalina_opts}" + +# A bug in catalina.sh script does not use CATALINA_OPTS for stopping the server +# +if [ "${1}" = "stop" ]; then + export JAVA_OPTS=${CATALINA_OPTS} +fi + +if [ "${HTTPFS_SILENT}" != "true" ]; then + exec ${HTTPFS_CATALINA_HOME}/bin/catalina.sh "$@" +else + exec ${HTTPFS_CATALINA_HOME}/bin/catalina.sh "$@" > /dev/null +fi + diff --git a/aarch64/sbin/mr-jobhistory-daemon.sh b/aarch64/sbin/mr-jobhistory-daemon.sh new file mode 100755 index 0000000..9ef3d45 --- /dev/null +++ b/aarch64/sbin/mr-jobhistory-daemon.sh @@ -0,0 +1,146 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# +# Environment Variables +# +# HADOOP_JHS_LOGGER Hadoop JobSummary logger. +# HADOOP_CONF_DIR Alternate conf dir. Default is ${HADOOP_MAPRED_HOME}/conf. +# HADOOP_MAPRED_PID_DIR The pid files are stored. /tmp by default. +# HADOOP_MAPRED_NICENESS The scheduling priority for daemons. Defaults to 0. +## + +usage="Usage: mr-jobhistory-daemon.sh [--config ] (start|stop) " + +# if no args specified, show usage +if [ $# -le 1 ]; then + echo $usage + exit 1 +fi + +bin=`dirname "${BASH_SOURCE-$0}"` +bin=`cd "$bin"; pwd` + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +if [ -e ${HADOOP_LIBEXEC_DIR}/mapred-config.sh ]; then + . $HADOOP_LIBEXEC_DIR/mapred-config.sh +fi + +# get arguments +startStop=$1 +shift +command=$1 +shift + +hadoop_rotate_log () +{ + log=$1; + num=5; + if [ -n "$2" ]; then + num=$2 + fi + if [ -f "$log" ]; then # rotate logs + while [ $num -gt 1 ]; do + prev=`expr $num - 1` + [ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num" + num=$prev + done + mv "$log" "$log.$num"; + fi +} + +if [ "$HADOOP_MAPRED_IDENT_STRING" = "" ]; then + export HADOOP_MAPRED_IDENT_STRING="$USER" +fi + +export HADOOP_MAPRED_HOME=${HADOOP_MAPRED_HOME:-${HADOOP_PREFIX}} +export HADOOP_MAPRED_LOGFILE=mapred-$HADOOP_MAPRED_IDENT_STRING-$command-$HOSTNAME.log +export HADOOP_MAPRED_ROOT_LOGGER=${HADOOP_MAPRED_ROOT_LOGGER:-INFO,RFA} +export HADOOP_JHS_LOGGER=${HADOOP_JHS_LOGGER:-INFO,JSA} + +if [ -f "${HADOOP_CONF_DIR}/mapred-env.sh" ]; then + . "${HADOOP_CONF_DIR}/mapred-env.sh" +fi + +mkdir -p "$HADOOP_MAPRED_LOG_DIR" +chown $HADOOP_MAPRED_IDENT_STRING $HADOOP_MAPRED_LOG_DIR + +if [ "$HADOOP_MAPRED_PID_DIR" = "" ]; then + HADOOP_MAPRED_PID_DIR=/tmp +fi + +HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_MAPRED_IDENT_STRING" + +log=$HADOOP_MAPRED_LOG_DIR/mapred-$HADOOP_MAPRED_IDENT_STRING-$command-$HOSTNAME.out +pid=$HADOOP_MAPRED_PID_DIR/mapred-$HADOOP_MAPRED_IDENT_STRING-$command.pid + +HADOOP_MAPRED_STOP_TIMEOUT=${HADOOP_MAPRED_STOP_TIMEOUT:-5} + +# Set default scheduling priority +if [ "$HADOOP_MAPRED_NICENESS" = "" ]; then + export HADOOP_MAPRED_NICENESS=0 +fi + +case $startStop in + + (start) + + mkdir -p "$HADOOP_MAPRED_PID_DIR" + + if [ -f $pid ]; then + if kill -0 `cat $pid` > /dev/null 2>&1; then + echo $command running as process `cat $pid`. Stop it first. + exit 1 + fi + fi + + hadoop_rotate_log $log + echo starting $command, logging to $log + cd "$HADOOP_MAPRED_HOME" + nohup nice -n $HADOOP_MAPRED_NICENESS "$HADOOP_MAPRED_HOME"/bin/mapred --config $HADOOP_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null & + echo $! > $pid + sleep 1; head "$log" + ;; + + (stop) + + if [ -f $pid ]; then + TARGET_PID=`cat $pid` + if kill -0 $TARGET_PID > /dev/null 2>&1; then + echo stopping $command + kill $TARGET_PID + sleep $HADOOP_MAPRED_STOP_TIMEOUT + if kill -0 $TARGET_PID > /dev/null 2>&1; then + echo "$command did not stop gracefully after $HADOOP_MAPRED_STOP_TIMEOUT seconds: killing with kill -9" + kill -9 $TARGET_PID + fi + else + echo no $command to stop + fi + else + echo no $command to stop + fi + ;; + + (*) + echo $usage + exit 1 + ;; + +esac diff --git a/aarch64/sbin/refresh-namenodes.sh b/aarch64/sbin/refresh-namenodes.sh new file mode 100755 index 0000000..d3f6759 --- /dev/null +++ b/aarch64/sbin/refresh-namenodes.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# ------------------------------------------------------------------ +# This script refreshes all namenodes, it's a simple wrapper +# for dfsadmin to support multiple namenodes. + +bin=`dirname "$0"` +bin=`cd "$bin"; pwd` + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +. $HADOOP_LIBEXEC_DIR/hdfs-config.sh + +namenodes=$("$HADOOP_PREFIX/bin/hdfs" getconf -nnRpcAddresses) +if [ "$?" != '0' ] ; then errorFlag='1' ; +else + for namenode in $namenodes ; do + echo "Refreshing namenode [$namenode]" + "$HADOOP_PREFIX/bin/hdfs" dfsadmin -fs hdfs://$namenode -refreshNodes + if [ "$?" != '0' ] ; then errorFlag='1' ; fi + done +fi + +if [ "$errorFlag" = '1' ] ; then + echo "Error: refresh of namenodes failed, see error messages above." + exit 1 +else + echo "Refresh of namenodes done." +fi + + +# eof diff --git a/aarch64/sbin/slaves.sh b/aarch64/sbin/slaves.sh new file mode 100755 index 0000000..016392f --- /dev/null +++ b/aarch64/sbin/slaves.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Run a shell command on all slave hosts. +# +# Environment Variables +# +# HADOOP_SLAVES File naming remote hosts. +# Default is ${HADOOP_CONF_DIR}/slaves. +# HADOOP_CONF_DIR Alternate conf dir. Default is ${HADOOP_PREFIX}/conf. +# HADOOP_SLAVE_SLEEP Seconds to sleep between spawning remote commands. +# HADOOP_SSH_OPTS Options passed to ssh when running remote commands. +## + +usage="Usage: slaves.sh [--config confdir] command..." + +# if no args specified, show usage +if [ $# -le 0 ]; then + echo $usage + exit 1 +fi + +bin=`dirname "${BASH_SOURCE-$0}"` +bin=`cd "$bin"; pwd` + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +. $HADOOP_LIBEXEC_DIR/hadoop-config.sh + +if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then + . "${HADOOP_CONF_DIR}/hadoop-env.sh" +fi + +# Where to start the script, see hadoop-config.sh +# (it set up the variables based on command line options) +if [ "$HADOOP_SLAVE_NAMES" != '' ] ; then + SLAVE_NAMES=$HADOOP_SLAVE_NAMES +else + SLAVE_FILE=${HADOOP_SLAVES:-${HADOOP_CONF_DIR}/slaves} + SLAVE_NAMES=$(cat "$SLAVE_FILE" | sed 's/#.*$//;/^$/d') +fi + +# start the daemons +for slave in $SLAVE_NAMES ; do + ssh $HADOOP_SSH_OPTS $slave $"${@// /\\ }" \ + 2>&1 | sed "s/^/$slave: /" & + if [ "$HADOOP_SLAVE_SLEEP" != "" ]; then + sleep $HADOOP_SLAVE_SLEEP + fi +done + +wait diff --git a/aarch64/sbin/start-all.cmd b/aarch64/sbin/start-all.cmd new file mode 100755 index 0000000..9f65b5d --- /dev/null +++ b/aarch64/sbin/start-all.cmd @@ -0,0 +1,52 @@ +@echo off +@rem Licensed to the Apache Software Foundation (ASF) under one or more +@rem contributor license agreements. See the NOTICE file distributed with +@rem this work for additional information regarding copyright ownership. +@rem The ASF licenses this file to You under the Apache License, Version 2.0 +@rem (the "License"); you may not use this file except in compliance with +@rem the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. + +setlocal enabledelayedexpansion + +@rem Start all hadoop daemons. Run this on master node. + +echo This script is Deprecated. Instead use start-dfs.cmd and start-yarn.cmd + +if not defined HADOOP_BIN_PATH ( + set HADOOP_BIN_PATH=%~dp0 +) + +if "%HADOOP_BIN_PATH:~-1%" == "\" ( + set HADOOP_BIN_PATH=%HADOOP_BIN_PATH:~0,-1% +) + +set DEFAULT_LIBEXEC_DIR=%HADOOP_BIN_PATH%\..\libexec +if not defined HADOOP_LIBEXEC_DIR ( + set HADOOP_LIBEXEC_DIR=%DEFAULT_LIBEXEC_DIR% +) + +call %HADOOP_LIBEXEC_DIR%\hadoop-config.cmd %* +if "%1" == "--config" ( + shift + shift +) + +@rem start hdfs daemons if hdfs is present +if exist %HADOOP_HDFS_HOME%\sbin\start-dfs.cmd ( + call %HADOOP_HDFS_HOME%\sbin\start-dfs.cmd --config %HADOOP_CONF_DIR% +) + +@rem start yarn daemons if yarn is present +if exist %HADOOP_YARN_HOME%\sbin\start-yarn.cmd ( + call %HADOOP_YARN_HOME%\sbin\start-yarn.cmd --config %HADOOP_CONF_DIR% +) + +endlocal diff --git a/aarch64/sbin/start-all.sh b/aarch64/sbin/start-all.sh new file mode 100755 index 0000000..3124328 --- /dev/null +++ b/aarch64/sbin/start-all.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Start all hadoop daemons. Run this on master node. + +echo "This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh" + +bin=`dirname "${BASH_SOURCE-$0}"` +bin=`cd "$bin"; pwd` + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +. $HADOOP_LIBEXEC_DIR/hadoop-config.sh + +# start hdfs daemons if hdfs is present +if [ -f "${HADOOP_HDFS_HOME}"/sbin/start-dfs.sh ]; then + "${HADOOP_HDFS_HOME}"/sbin/start-dfs.sh --config $HADOOP_CONF_DIR +fi + +# start yarn daemons if yarn is present +if [ -f "${HADOOP_YARN_HOME}"/sbin/start-yarn.sh ]; then + "${HADOOP_YARN_HOME}"/sbin/start-yarn.sh --config $HADOOP_CONF_DIR +fi diff --git a/aarch64/sbin/start-balancer.sh b/aarch64/sbin/start-balancer.sh new file mode 100755 index 0000000..2c14a59 --- /dev/null +++ b/aarch64/sbin/start-balancer.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +bin=`dirname "${BASH_SOURCE-$0}"` +bin=`cd "$bin"; pwd` + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +. $HADOOP_LIBEXEC_DIR/hdfs-config.sh + +# Start balancer daemon. + +"$HADOOP_PREFIX"/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script "$bin"/hdfs start balancer $@ diff --git a/aarch64/sbin/start-dfs.cmd b/aarch64/sbin/start-dfs.cmd new file mode 100755 index 0000000..9f20e5a --- /dev/null +++ b/aarch64/sbin/start-dfs.cmd @@ -0,0 +1,41 @@ +@echo off +@rem Licensed to the Apache Software Foundation (ASF) under one or more +@rem contributor license agreements. See the NOTICE file distributed with +@rem this work for additional information regarding copyright ownership. +@rem The ASF licenses this file to You under the Apache License, Version 2.0 +@rem (the "License"); you may not use this file except in compliance with +@rem the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem +setlocal enabledelayedexpansion + +if not defined HADOOP_BIN_PATH ( + set HADOOP_BIN_PATH=%~dp0 +) + +if "%HADOOP_BIN_PATH:~-1%" == "\" ( + set HADOOP_BIN_PATH=%HADOOP_BIN_PATH:~0,-1% +) + +set DEFAULT_LIBEXEC_DIR=%HADOOP_BIN_PATH%\..\libexec +if not defined HADOOP_LIBEXEC_DIR ( + set HADOOP_LIBEXEC_DIR=%DEFAULT_LIBEXEC_DIR% +) + +call %HADOOP_LIBEXEC_DIR%\hdfs-config.cmd %* +if "%1" == "--config" ( + shift + shift +) + +start "Apache Hadoop Distribution" hadoop namenode +start "Apache Hadoop Distribution" hadoop datanode + +endlocal diff --git a/aarch64/sbin/start-dfs.sh b/aarch64/sbin/start-dfs.sh new file mode 100755 index 0000000..8cbea16 --- /dev/null +++ b/aarch64/sbin/start-dfs.sh @@ -0,0 +1,117 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Start hadoop dfs daemons. +# Optinally upgrade or rollback dfs state. +# Run this on master node. + +usage="Usage: start-dfs.sh [-upgrade|-rollback] [other options such as -clusterId]" + +bin=`dirname "${BASH_SOURCE-$0}"` +bin=`cd "$bin"; pwd` + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +. $HADOOP_LIBEXEC_DIR/hdfs-config.sh + +# get arguments +if [ $# -ge 1 ]; then + nameStartOpt="$1" + shift + case "$nameStartOpt" in + (-upgrade) + ;; + (-rollback) + dataStartOpt="$nameStartOpt" + ;; + (*) + echo $usage + exit 1 + ;; + esac +fi + +#Add other possible options +nameStartOpt="$nameStartOpt $@" + +#--------------------------------------------------------- +# namenodes + +NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -namenodes) + +echo "Starting namenodes on [$NAMENODES]" + +"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \ + --config "$HADOOP_CONF_DIR" \ + --hostnames "$NAMENODES" \ + --script "$bin/hdfs" start namenode $nameStartOpt + +#--------------------------------------------------------- +# datanodes (using default slaves file) + +if [ -n "$HADOOP_SECURE_DN_USER" ]; then + echo \ + "Attempting to start secure cluster, skipping datanodes. " \ + "Run start-secure-dns.sh as root to complete startup." +else + "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \ + --config "$HADOOP_CONF_DIR" \ + --script "$bin/hdfs" start datanode $dataStartOpt +fi + +#--------------------------------------------------------- +# secondary namenodes (if any) + +SECONDARY_NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -secondarynamenodes 2>/dev/null) + +if [ -n "$SECONDARY_NAMENODES" ]; then + echo "Starting secondary namenodes [$SECONDARY_NAMENODES]" + + "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \ + --config "$HADOOP_CONF_DIR" \ + --hostnames "$SECONDARY_NAMENODES" \ + --script "$bin/hdfs" start secondarynamenode +fi + +#--------------------------------------------------------- +# quorumjournal nodes (if any) + +SHARED_EDITS_DIR=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.namenode.shared.edits.dir 2>&-) + +case "$SHARED_EDITS_DIR" in +qjournal://*) + JOURNAL_NODES=$(echo "$SHARED_EDITS_DIR" | sed 's,qjournal://\([^/]*\)/.*,\1,g; s/;/ /g; s/:[0-9]*//g') + echo "Starting journal nodes [$JOURNAL_NODES]" + "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \ + --config "$HADOOP_CONF_DIR" \ + --hostnames "$JOURNAL_NODES" \ + --script "$bin/hdfs" start journalnode ;; +esac + +#--------------------------------------------------------- +# ZK Failover controllers, if auto-HA is enabled +AUTOHA_ENABLED=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.ha.automatic-failover.enabled) +if [ "$(echo "$AUTOHA_ENABLED" | tr A-Z a-z)" = "true" ]; then + echo "Starting ZK Failover Controllers on NN hosts [$NAMENODES]" + "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \ + --config "$HADOOP_CONF_DIR" \ + --hostnames "$NAMENODES" \ + --script "$bin/hdfs" start zkfc +fi + +# eof diff --git a/aarch64/sbin/start-secure-dns.sh b/aarch64/sbin/start-secure-dns.sh new file mode 100755 index 0000000..7ddf687 --- /dev/null +++ b/aarch64/sbin/start-secure-dns.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Run as root to start secure datanodes in a security-enabled cluster. + +usage="Usage (run as root in order to start secure datanodes): start-secure-dns.sh" + +bin=`dirname "${BASH_SOURCE-$0}"` +bin=`cd "$bin"; pwd` + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +. $HADOOP_LIBEXEC_DIR/hdfs-config.sh + +if [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_USER" ]; then + "$HADOOP_PREFIX"/sbin/hadoop-daemons.sh --config $HADOOP_CONF_DIR --script "$bin"/hdfs start datanode $dataStartOpt +else + echo $usage +fi diff --git a/aarch64/sbin/start-yarn.cmd b/aarch64/sbin/start-yarn.cmd new file mode 100755 index 0000000..989510b --- /dev/null +++ b/aarch64/sbin/start-yarn.cmd @@ -0,0 +1,47 @@ +@echo off +@rem Licensed to the Apache Software Foundation (ASF) under one or more +@rem contributor license agreements. See the NOTICE file distributed with +@rem this work for additional information regarding copyright ownership. +@rem The ASF licenses this file to You under the Apache License, Version 2.0 +@rem (the "License"); you may not use this file except in compliance with +@rem the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem +setlocal enabledelayedexpansion + +echo starting yarn daemons + +if not defined HADOOP_BIN_PATH ( + set HADOOP_BIN_PATH=%~dp0 +) + +if "%HADOOP_BIN_PATH:~-1%" == "\" ( + set HADOOP_BIN_PATH=%HADOOP_BIN_PATH:~0,-1% +) + +set DEFAULT_LIBEXEC_DIR=%HADOOP_BIN_PATH%\..\libexec +if not defined HADOOP_LIBEXEC_DIR ( + set HADOOP_LIBEXEC_DIR=%DEFAULT_LIBEXEC_DIR% +) + +call %HADOOP_LIBEXEC_DIR%\yarn-config.cmd %* +if "%1" == "--config" ( + shift + shift +) + +@rem start resourceManager +start "Apache Hadoop Distribution" yarn resourcemanager +@rem start nodeManager +start "Apache Hadoop Distribution" yarn nodemanager +@rem start proxyserver +@rem start "Apache Hadoop Distribution" yarn proxyserver + +endlocal diff --git a/aarch64/sbin/start-yarn.sh b/aarch64/sbin/start-yarn.sh new file mode 100755 index 0000000..40b77fb --- /dev/null +++ b/aarch64/sbin/start-yarn.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Start all yarn daemons. Run this on master node. + +echo "starting yarn daemons" + +bin=`dirname "${BASH_SOURCE-$0}"` +bin=`cd "$bin"; pwd` + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +. $HADOOP_LIBEXEC_DIR/yarn-config.sh + +# start resourceManager +"$bin"/yarn-daemon.sh --config $YARN_CONF_DIR start resourcemanager +# start nodeManager +"$bin"/yarn-daemons.sh --config $YARN_CONF_DIR start nodemanager +# start proxyserver +#"$bin"/yarn-daemon.sh --config $YARN_CONF_DIR start proxyserver diff --git a/aarch64/sbin/stop-all.cmd b/aarch64/sbin/stop-all.cmd new file mode 100755 index 0000000..1d22c79 --- /dev/null +++ b/aarch64/sbin/stop-all.cmd @@ -0,0 +1,52 @@ +@echo off +@rem Licensed to the Apache Software Foundation (ASF) under one or more +@rem contributor license agreements. See the NOTICE file distributed with +@rem this work for additional information regarding copyright ownership. +@rem The ASF licenses this file to You under the Apache License, Version 2.0 +@rem (the "License"); you may not use this file except in compliance with +@rem the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. + +setlocal enabledelayedexpansion + +@rem Stop all hadoop daemons. Run this on master node. + +echo This script is Deprecated. Instead use stop-dfs.cmd and stop-yarn.cmd + +if not defined HADOOP_BIN_PATH ( + set HADOOP_BIN_PATH=%~dp0 +) + +if "%HADOOP_BIN_PATH:~-1%" == "\" ( + set HADOOP_BIN_PATH=%HADOOP_BIN_PATH:~0,-1% +) + +set DEFAULT_LIBEXEC_DIR=%HADOOP_BIN_PATH%\..\libexec +if not defined HADOOP_LIBEXEC_DIR ( + set HADOOP_LIBEXEC_DIR=%DEFAULT_LIBEXEC_DIR% +) + +call %HADOOP_LIBEXEC_DIR%\hadoop-config.cmd %* +if "%1" == "--config" ( + shift + shift +) + +@rem stop hdfs daemons if hdfs is present +if exist %HADOOP_HDFS_HOME%\sbin\stop-dfs.cmd ( + call %HADOOP_HDFS_HOME%\sbin\stop-dfs.cmd --config %HADOOP_CONF_DIR% +) + +@rem stop yarn daemons if yarn is present +if exist %HADOOP_YARN_HOME%\sbin\stop-yarn.cmd ( + call %HADOOP_YARN_HOME%\sbin\stop-yarn.cmd --config %HADOOP_CONF_DIR% +) + +endlocal diff --git a/aarch64/sbin/stop-all.sh b/aarch64/sbin/stop-all.sh new file mode 100755 index 0000000..9a2fe98 --- /dev/null +++ b/aarch64/sbin/stop-all.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Stop all hadoop daemons. Run this on master node. + +echo "This script is Deprecated. Instead use stop-dfs.sh and stop-yarn.sh" + +bin=`dirname "${BASH_SOURCE-$0}"` +bin=`cd "$bin"; pwd` + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +. $HADOOP_LIBEXEC_DIR/hadoop-config.sh + +# stop hdfs daemons if hdfs is present +if [ -f "${HADOOP_HDFS_HOME}"/sbin/stop-dfs.sh ]; then + "${HADOOP_HDFS_HOME}"/sbin/stop-dfs.sh --config $HADOOP_CONF_DIR +fi + +# stop yarn daemons if yarn is present +if [ -f "${HADOOP_HDFS_HOME}"/sbin/stop-yarn.sh ]; then + "${HADOOP_HDFS_HOME}"/sbin/stop-yarn.sh --config $HADOOP_CONF_DIR +fi diff --git a/aarch64/sbin/stop-balancer.sh b/aarch64/sbin/stop-balancer.sh new file mode 100755 index 0000000..df82456 --- /dev/null +++ b/aarch64/sbin/stop-balancer.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +bin=`dirname "${BASH_SOURCE-$0}"` +bin=`cd "$bin"; pwd` + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +. $HADOOP_LIBEXEC_DIR/hdfs-config.sh + +# Stop balancer daemon. +# Run this on the machine where the balancer is running + +"$HADOOP_PREFIX"/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script "$bin"/hdfs stop balancer diff --git a/aarch64/sbin/stop-dfs.cmd b/aarch64/sbin/stop-dfs.cmd new file mode 100755 index 0000000..f0cf015 --- /dev/null +++ b/aarch64/sbin/stop-dfs.cmd @@ -0,0 +1,41 @@ +@echo off +@rem Licensed to the Apache Software Foundation (ASF) under one or more +@rem contributor license agreements. See the NOTICE file distributed with +@rem this work for additional information regarding copyright ownership. +@rem The ASF licenses this file to You under the Apache License, Version 2.0 +@rem (the "License"); you may not use this file except in compliance with +@rem the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem +setlocal enabledelayedexpansion + +if not defined HADOOP_BIN_PATH ( + set HADOOP_BIN_PATH=%~dp0 +) + +if "%HADOOP_BIN_PATH:~-1%" == "\" ( + set HADOOP_BIN_PATH=%HADOOP_BIN_PATH:~0,-1% +) + +set DEFAULT_LIBEXEC_DIR=%HADOOP_BIN_PATH%\..\libexec +if not defined HADOOP_LIBEXEC_DIR ( + set HADOOP_LIBEXEC_DIR=%DEFAULT_LIBEXEC_DIR% +) + +call %HADOOP_LIBEXEC_DIR%\hadoop-config.cmd %* +if "%1" == "--config" ( + shift + shift +) + +Taskkill /FI "WINDOWTITLE eq Apache Hadoop Distribution - hadoop namenode" +Taskkill /FI "WINDOWTITLE eq Apache Hadoop Distribution - hadoop datanode" + +endlocal diff --git a/aarch64/sbin/stop-dfs.sh b/aarch64/sbin/stop-dfs.sh new file mode 100755 index 0000000..6a622fa --- /dev/null +++ b/aarch64/sbin/stop-dfs.sh @@ -0,0 +1,89 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +bin=`dirname "${BASH_SOURCE-$0}"` +bin=`cd "$bin"; pwd` + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +. $HADOOP_LIBEXEC_DIR/hdfs-config.sh + +#--------------------------------------------------------- +# namenodes + +NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -namenodes) + +echo "Stopping namenodes on [$NAMENODES]" + +"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \ + --config "$HADOOP_CONF_DIR" \ + --hostnames "$NAMENODES" \ + --script "$bin/hdfs" stop namenode + +#--------------------------------------------------------- +# datanodes (using default slaves file) + +if [ -n "$HADOOP_SECURE_DN_USER" ]; then + echo \ + "Attempting to stop secure cluster, skipping datanodes. " \ + "Run stop-secure-dns.sh as root to complete shutdown." +else + "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \ + --config "$HADOOP_CONF_DIR" \ + --script "$bin/hdfs" stop datanode +fi + +#--------------------------------------------------------- +# secondary namenodes (if any) + +SECONDARY_NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -secondarynamenodes 2>/dev/null) + +if [ -n "$SECONDARY_NAMENODES" ]; then + echo "Stopping secondary namenodes [$SECONDARY_NAMENODES]" + + "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \ + --config "$HADOOP_CONF_DIR" \ + --hostnames "$SECONDARY_NAMENODES" \ + --script "$bin/hdfs" stop secondarynamenode +fi + +#--------------------------------------------------------- +# quorumjournal nodes (if any) + +SHARED_EDITS_DIR=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.namenode.shared.edits.dir 2>&-) + +case "$SHARED_EDITS_DIR" in +qjournal://*) + JOURNAL_NODES=$(echo "$SHARED_EDITS_DIR" | sed 's,qjournal://\([^/]*\)/.*,\1,g; s/;/ /g; s/:[0-9]*//g') + echo "Stopping journal nodes [$JOURNAL_NODES]" + "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \ + --config "$HADOOP_CONF_DIR" \ + --hostnames "$JOURNAL_NODES" \ + --script "$bin/hdfs" stop journalnode ;; +esac + +#--------------------------------------------------------- +# ZK Failover controllers, if auto-HA is enabled +AUTOHA_ENABLED=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.ha.automatic-failover.enabled) +if [ "$(echo "$AUTOHA_ENABLED" | tr A-Z a-z)" = "true" ]; then + echo "Stopping ZK Failover Controllers on NN hosts [$NAMENODES]" + "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \ + --config "$HADOOP_CONF_DIR" \ + --hostnames "$NAMENODES" \ + --script "$bin/hdfs" stop zkfc +fi +# eof diff --git a/aarch64/sbin/stop-secure-dns.sh b/aarch64/sbin/stop-secure-dns.sh new file mode 100755 index 0000000..fdd47c3 --- /dev/null +++ b/aarch64/sbin/stop-secure-dns.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Run as root to start secure datanodes in a security-enabled cluster. + +usage="Usage (run as root in order to stop secure datanodes): stop-secure-dns.sh" + +bin=`dirname "${BASH_SOURCE-$0}"` +bin=`cd "$bin"; pwd` + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +. $HADOOP_LIBEXEC_DIR/hdfs-config.sh + +if [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_USER" ]; then + "$HADOOP_PREFIX"/sbin/hadoop-daemons.sh --config $HADOOP_CONF_DIR --script "$bin"/hdfs stop datanode +else + echo $usage +fi diff --git a/aarch64/sbin/stop-yarn.cmd b/aarch64/sbin/stop-yarn.cmd new file mode 100755 index 0000000..0914337 --- /dev/null +++ b/aarch64/sbin/stop-yarn.cmd @@ -0,0 +1,47 @@ +@echo off +@rem Licensed to the Apache Software Foundation (ASF) under one or more +@rem contributor license agreements. See the NOTICE file distributed with +@rem this work for additional information regarding copyright ownership. +@rem The ASF licenses this file to You under the Apache License, Version 2.0 +@rem (the "License"); you may not use this file except in compliance with +@rem the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem +setlocal enabledelayedexpansion + +echo stopping yarn daemons + +if not defined HADOOP_BIN_PATH ( + set HADOOP_BIN_PATH=%~dp0 +) + +if "%HADOOP_BIN_PATH:~-1%" == "\" ( + set HADOOP_BIN_PATH=%HADOOP_BIN_PATH:~0,-1% +) + +set DEFAULT_LIBEXEC_DIR=%HADOOP_BIN_PATH%\..\libexec +if not defined HADOOP_LIBEXEC_DIR ( + set HADOOP_LIBEXEC_DIR=%DEFAULT_LIBEXEC_DIR% +) + +call %HADOOP_LIBEXEC_DIR%\yarn-config.cmd %* +if "%1" == "--config" ( + shift + shift +) + +@rem stop resourceManager +Taskkill /FI "WINDOWTITLE eq Apache Hadoop Distribution - yarn resourcemanager" +@rem stop nodeManager +Taskkill /FI "WINDOWTITLE eq Apache Hadoop Distribution - yarn nodemanager" +@rem stop proxy server +Taskkill /FI "WINDOWTITLE eq Apache Hadoop Distribution - yarn proxyserver" + +endlocal diff --git a/aarch64/sbin/stop-yarn.sh b/aarch64/sbin/stop-yarn.sh new file mode 100755 index 0000000..a8498ef --- /dev/null +++ b/aarch64/sbin/stop-yarn.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Stop all yarn daemons. Run this on master node. + +echo "stopping yarn daemons" + +bin=`dirname "${BASH_SOURCE-$0}"` +bin=`cd "$bin"; pwd` + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +. $HADOOP_LIBEXEC_DIR/yarn-config.sh + +# stop resourceManager +"$bin"/yarn-daemon.sh --config $YARN_CONF_DIR stop resourcemanager +# stop nodeManager +"$bin"/yarn-daemons.sh --config $YARN_CONF_DIR stop nodemanager +# stop proxy server +"$bin"/yarn-daemon.sh --config $YARN_CONF_DIR stop proxyserver diff --git a/aarch64/sbin/yarn-daemon.sh b/aarch64/sbin/yarn-daemon.sh new file mode 100755 index 0000000..527ae42 --- /dev/null +++ b/aarch64/sbin/yarn-daemon.sh @@ -0,0 +1,160 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Runs a yarn command as a daemon. +# +# Environment Variables +# +# YARN_CONF_DIR Alternate conf dir. Default is ${HADOOP_YARN_HOME}/conf. +# YARN_LOG_DIR Where log files are stored. PWD by default. +# YARN_MASTER host:path where hadoop code should be rsync'd from +# YARN_PID_DIR The pid files are stored. /tmp by default. +# YARN_IDENT_STRING A string representing this instance of hadoop. $USER by default +# YARN_NICENESS The scheduling priority for daemons. Defaults to 0. +## + +usage="Usage: yarn-daemon.sh [--config ] [--hosts hostlistfile] (start|stop) " + +# if no args specified, show usage +if [ $# -le 1 ]; then + echo $usage + exit 1 +fi + +bin=`dirname "${BASH_SOURCE-$0}"` +bin=`cd "$bin"; pwd` + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +. $HADOOP_LIBEXEC_DIR/yarn-config.sh + +# get arguments +startStop=$1 +shift +command=$1 +shift + +hadoop_rotate_log () +{ + log=$1; + num=5; + if [ -n "$2" ]; then + num=$2 + fi + if [ -f "$log" ]; then # rotate logs + while [ $num -gt 1 ]; do + prev=`expr $num - 1` + [ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num" + num=$prev + done + mv "$log" "$log.$num"; + fi +} + +if [ -f "${YARN_CONF_DIR}/yarn-env.sh" ]; then + . "${YARN_CONF_DIR}/yarn-env.sh" +fi + +if [ "$YARN_IDENT_STRING" = "" ]; then + export YARN_IDENT_STRING="$USER" +fi + +# get log directory +if [ "$YARN_LOG_DIR" = "" ]; then + export YARN_LOG_DIR="$HADOOP_YARN_HOME/logs" +fi + +if [ ! -w "$YARN_LOG_DIR" ] ; then + mkdir -p "$YARN_LOG_DIR" + chown $YARN_IDENT_STRING $YARN_LOG_DIR +fi + +if [ "$YARN_PID_DIR" = "" ]; then + YARN_PID_DIR=/tmp +fi + +# some variables +export YARN_LOGFILE=yarn-$YARN_IDENT_STRING-$command-$HOSTNAME.log +export YARN_ROOT_LOGGER=${YARN_ROOT_LOGGER:-INFO,RFA} +log=$YARN_LOG_DIR/yarn-$YARN_IDENT_STRING-$command-$HOSTNAME.out +pid=$YARN_PID_DIR/yarn-$YARN_IDENT_STRING-$command.pid +YARN_STOP_TIMEOUT=${YARN_STOP_TIMEOUT:-5} + +# Set default scheduling priority +if [ "$YARN_NICENESS" = "" ]; then + export YARN_NICENESS=0 +fi + +case $startStop in + + (start) + + [ -w "$YARN_PID_DIR" ] || mkdir -p "$YARN_PID_DIR" + + if [ -f $pid ]; then + if kill -0 `cat $pid` > /dev/null 2>&1; then + echo $command running as process `cat $pid`. Stop it first. + exit 1 + fi + fi + + if [ "$YARN_MASTER" != "" ]; then + echo rsync from $YARN_MASTER + rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $YARN_MASTER/ "$HADOOP_YARN_HOME" + fi + + hadoop_rotate_log $log + echo starting $command, logging to $log + cd "$HADOOP_YARN_HOME" + nohup nice -n $YARN_NICENESS "$HADOOP_YARN_HOME"/bin/yarn --config $YARN_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null & + echo $! > $pid + sleep 1 + head "$log" + # capture the ulimit output + echo "ulimit -a" >> $log + ulimit -a >> $log 2>&1 + ;; + + (stop) + + if [ -f $pid ]; then + TARGET_PID=`cat $pid` + if kill -0 $TARGET_PID > /dev/null 2>&1; then + echo stopping $command + kill $TARGET_PID + sleep $YARN_STOP_TIMEOUT + if kill -0 $TARGET_PID > /dev/null 2>&1; then + echo "$command did not stop gracefully after $YARN_STOP_TIMEOUT seconds: killing with kill -9" + kill -9 $TARGET_PID + fi + else + echo no $command to stop + fi + else + echo no $command to stop + fi + ;; + + (*) + echo $usage + exit 1 + ;; + +esac + + diff --git a/aarch64/sbin/yarn-daemons.sh b/aarch64/sbin/yarn-daemons.sh new file mode 100755 index 0000000..a7858e4 --- /dev/null +++ b/aarch64/sbin/yarn-daemons.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Run a Yarn command on all slave hosts. + +usage="Usage: yarn-daemons.sh [--config confdir] [--hosts hostlistfile] [start +|stop] command args..." + +# if no args specified, show usage +if [ $# -le 1 ]; then + echo $usage + exit 1 +fi + +bin=`dirname "${BASH_SOURCE-$0}"` +bin=`cd "$bin"; pwd` + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +. $HADOOP_LIBEXEC_DIR/yarn-config.sh + +exec "$bin/slaves.sh" --config $YARN_CONF_DIR cd "$HADOOP_YARN_HOME" \; "$bin/yarn-daemon.sh" --config $YARN_CONF_DIR "$@" + diff --git a/aarch64/share/doc/hadoop/common/CHANGES.txt b/aarch64/share/doc/hadoop/common/CHANGES.txt new file mode 100644 index 0000000..6fefb12 --- /dev/null +++ b/aarch64/share/doc/hadoop/common/CHANGES.txt @@ -0,0 +1,13861 @@ +Hadoop Change Log + +Release 2.2.0 - 2013-10-13 + + INCOMPATIBLE CHANGES + + HADOOP-10020. Disable symlinks temporarily (branch-2.1-beta only change) + (sanjay via suresh) + + NEW FEATURES + + HDFS-4817. Make HDFS advisory caching configurable on a per-file basis. + (Contributed by Colin Patrick McCabe) + + IMPROVEMENTS + + HADOOP-9948. Add a config value to CLITestHelper to skip tests on Windows. + (Chuan Liu via cnauroth) + + HADOOP-9976. Different versions of avro and avro-maven-plugin (Karthik + Kambatla via Sandy Ryza) + + HADOOP-9758. Provide configuration option for FileSystem/FileContext + symlink resolution (Andrew Wang via Colin Patrick McCabe) + + HADOOP-8315. Support SASL-authenticated ZooKeeper in ActiveStandbyElector + (todd) + + OPTIMIZATIONS + + BUG FIXES + + HADOOP-9776. HarFileSystem.listStatus() returns invalid authority if port + number is empty. (Shanyu Zhao via ivanmi) + + HADOOP-9761. ViewFileSystem#rename fails when using DistributedFileSystem. + (Andrew Wang via Colin Patrick McCabe) + + HADOOP-10003. HarFileSystem.listLocatedStatus() fails. + (Jason Dere and suresh via suresh) + + HADOOP-10017. Fix NPE in DFSClient#getDelegationToken when doing Distcp + from a secured cluster to an insecured cluster. (Haohui Mai via jing9) + +Release 2.1.1-beta - 2013-09-23 + + INCOMPATIBLE CHANGES + + HADOOP-9944. Fix RpcRequestHeaderProto.callId to be sint32 rather than + uint32 since ipc.Client.CONNECTION_CONTEXT_CALL_ID is signed (i.e. -3) + (acmurthy) + + NEW FEATURES + + IMPROVEMENTS + + HADOOP-9910. proxy server start and stop documentation wrong + (Andre Kelpe via harsh) + + HADOOP-9787. ShutdownHelper util to shutdown threads and threadpools. + (Karthik Kambatla via Sandy Ryza) + + HADOOP-9803. Add a generic type parameter to RetryInvocationHandler. + (szetszwo) + + HADOOP-9821. ClientId should have getMsb/getLsb methods. + (Tsuyoshi OZAWA via jing9) + + HADOOP-9435. Support building the JNI code against the IBM JVM. + (Tian Hong Wang via Colin Patrick McCabe) + + HADOOP-9355. Abstract symlink tests to use either FileContext or + FileSystem. (Andrew Wang via Colin Patrick McCabe) + + HADOOP-9833 move slf4j to version 1.7.5 (Kousuke Saruta via stevel) + + HADOOP-9672. Upgrade Avro dependency to 1.7.4. (sandy via kihwal) + + HADOOP-8814. Replace string equals "" by String#isEmpty(). + (Brandon Li via suresh) + + HADOOP-9789. Support server advertised kerberos principals (daryn) + + HADOOP-9802. Support Snappy codec on Windows. (cnauroth) + + HADOOP-9879. Move the version info of zookeeper dependencies to + hadoop-project/pom (Karthik Kambatla via Sandy Ryza) + + HADOOP-9886. Turn warning message in RetryInvocationHandler to debug (arpit) + + HADOOP-9906. Move HAZKUtil to o.a.h.util.ZKUtil and make inner-classes + public (Karthik Kambatla via Sandy Ryza) + + HADOOP-9918. Add addIfService to CompositeService (Karthik Kambatla via + Sandy Ryza) + + HADOOP-9945. HAServiceState should have a state for stopped services. + (Karthik Kambatla via atm) + + HADOOP-9962. in order to avoid dependency divergence within Hadoop itself + lets enable DependencyConvergence. (rvs via tucu) + + HADOOP-9487 Deprecation warnings in Configuration should go to their + own log or otherwise be suppressible (Chu Tong via stevel) + + HADOOP-9669. Reduce the number of byte array creations and copies in + XDR data manipulation. (Haohui Mai via brandonli) + + OPTIMIZATIONS + + BUG FIXES + + HADOOP-9916. Fix race in ipc.Client retry. (Binglin Chang via llu) + + HADOOP-9768. chown and chgrp reject users and groups with spaces on platforms + where spaces are otherwise acceptable. (cnauroth) + + HADOOP-9801. Configuration#writeXml uses platform defaulting encoding, which + may mishandle multi-byte characters. (cnauroth) + + HADOOP-9806 PortmapInterface should check if the procedure is out-of-range + (brandonli) + + HADOOP-9315. Port HADOOP-9249 hadoop-maven-plugins Clover fix to branch-2 to + fix build failures. (Dennis Y via cnauroth) + + HADOOP-9831. Make checknative shell command accessible on Windows. (cnauroth) + + HADOOP-9675. use svn:eol-style native for html to prevent line ending + issues (Colin Patrick McCabe) + + HADOOP-9757. Har metadata cache can grow without limit (Cristina Abad via daryn) + + HADOOP-9858. Remove unused private RawLocalFileSystem#execCommand method from + branch-2. (cnauroth) + + HADOOP-9857. Tests block and sometimes timeout on Windows due to invalid + entropy source. (cnauroth) + + HADOOP-9527. Add symlink support to LocalFileSystem on Windows. + (Arpit Agarwal) + + HADOOP-9381. Document dfs cp -f option. (Keegan Witt, suresh via suresh) + + HADOOP-9868. Server must not advertise kerberos realm. (daryn via kihwal) + + HADOOP-9880. SASL changes from HADOOP-9421 breaks Secure HA NN. (daryn via + jing9) + + HADOOP-9899. Remove the debug message, added by HADOOP-8855, from + KerberosAuthenticator. (szetszwo) + + HADOOP-9894. Race condition in Shell leads to logged error stream handling + exceptions (Arpit Agarwal) + + HADOOP-9774. RawLocalFileSystem.listStatus() return absolute paths when + input path is relative on Windows. (Shanyu Zhao via ivanmi) + + HADOOP-9924. FileUtil.createJarWithClassPath() does not generate relative + classpath correctly. (Shanyu Zhao via ivanmi) + + HADOOP-9932. Improper synchronization in RetryCache. (kihwal) + + HADOOP-9958. Add old constructor back to DelegationTokenInformation to + unbreak downstream builds. (Andrew Wang) + + HADOOP-9960. Upgrade Jersey version to 1.9. (Karthik Kambatla via atm) + + HADOOP-9557. hadoop-client excludes commons-httpclient. (Lohit Vijayarenu via + cnauroth) + + HADOOP-9350. Hadoop not building against Java7 on OSX + (Robert Kanter via stevel) + + HADOOP-9961. versions of a few transitive dependencies diverged between hadoop + subprojects. (rvs via tucu) + + HADOOP-9977. Hadoop services won't start with different keypass and + keystorepass when https is enabled. (cnauroth) + +Release 2.1.0-beta - 2013-08-22 + + INCOMPATIBLE CHANGES + + HADOOP-8886. Remove KFS support. (eli) + + HADOOP-9163. [RPC v9] The rpc msg in ProtobufRpcEngine.proto should be moved out to + avoid an extra copy (Sanjay Radia) + + HADOOP-9151. [RPC v9] Include RPC error info in RpcResponseHeader instead of sending + it separately (sanjay Radia) + + HADOOP-9380. [RPC v9] Add totalLength to rpc response (sanjay Radia) + + HADOOP-9425. [RPC v9] Add error codes to rpc-response (sanjay Radia) + + HADOOP-9194. [RPC v9] RPC support for QoS. (Junping Du via llu) + + HADOOP-9630. [RPC v9] Remove IpcSerializationType. (Junping Du via llu) + + HADOOP-9421. [RPC v9] Convert SASL to use ProtoBuf and provide + negotiation capabilities (daryn) + + HADOOP-9688. Add globally unique Client ID to RPC requests. (suresh) + + HADOOP-9683. [RPC v9] Wrap IpcConnectionContext in RPC headers (daryn) + + HADOOP-9698. [RPC v9] Client must honor server's SASL negotiate response (daryn) + + HADOOP-9832. [RPC v9] Add RPC header to client ping (daryn) + + HADOOP-9820. [RPC v9] Wire protocol is insufficient to support multiplexing. (daryn via jitendra) + + NEW FEATURES + + HADOOP-9283. Add support for running the Hadoop client on AIX. (atm) + + HADOOP-8415. Add getDouble() and setDouble() in + org.apache.hadoop.conf.Configuration (Jan van der Lugt via harsh) + + HADOOP-9338. FsShell Copy Commands Should Optionally Preserve File + Attributes. (Nick White via atm) + + HADOOP-8562. Enhancements to support Hadoop on Windows Server and Windows + Azure environments. (See breakdown of tasks below for subtasks and + contributors) + + HADOOP-8469. Make NetworkTopology class pluggable. (Junping Du via + szetszwo) + + HADOOP-8470. Add NetworkTopologyWithNodeGroup, a 4-layer implementation + of NetworkTopology. (Junping Du via szetszwo) + + HADOOP-9763. Extends LightWeightGSet to support eviction of expired + elements. (Tsz Wo (Nicholas) SZE via jing9) + + HADOOP-9762. RetryCache utility for implementing RPC retries. + (Suresh Srinivas via jing9) + + HADOOP-9792. Retry the methods that are tagged @AtMostOnce along + with @Idempotent. (suresh) + + HADOOP-9509. Implement ONCRPC and XDR. (brandonli) + + HADOOP-9515. Add general interface for NFS and Mount. (brandonli) + + IMPROVEMENTS + + HADOOP-9164. Print paths of loaded native libraries in + NativeLibraryChecker. (Binglin Chang via llu) + + HADOOP-9253. Capture ulimit info in the logs at service start time. + (Arpit Gupta via suresh) + + HADOOP-8924. Add maven plugin alternative to shell script to save + package-info.java. (Chris Nauroth via suresh) + + HADOOP-9117. replace protoc ant plugin exec with a maven plugin. (tucu) + + HADOOP-9279. Document the need to build hadoop-maven-plugins for + eclipse and separate project builds. (Tsuyoshi Ozawa via suresh) + + HADOOP-9334. Upgrade netty version. (Nicolas Liochon via suresh) + + HADOOP-9343. Allow additional exceptions through the RPC layer. (sseth) + + HADOOP-9318. When exiting on a signal, print the signal name first. (Colin + Patrick McCabe via atm) + + HADOOP-9358. "Auth failed" log should include exception string (todd) + + HADOOP-9401. CodecPool: Add counters for number of (de)compressors + leased out. (kkambatl via tucu) + + HADOOP-9450. HADOOP_USER_CLASSPATH_FIRST is not honored; CLASSPATH + is PREpended instead of APpended. (Chris Nauroth and harsh via harsh) + + HADOOP-9496. Bad merge of HADOOP-9450 on branch-2 breaks all bin/hadoop + calls that need HADOOP_CLASSPATH. (harsh) + + HADOOP-9503. Remove sleep between IPC client connect timeouts. + (Varun Sharma via szetszwo) + + HADOOP-9322. LdapGroupsMapping doesn't seem to set a timeout for + its directory search. (harsh) + + HADOOP-9523. Provide a generic IBM java vendor flag in PlatformName.java + to support non-Sun JREs. (Tian Hong Wang via suresh) + + HADOOP-9511. Adding support for additional input streams (FSDataInputStream + and RandomAccessFile) in SecureIOUtils so as to help YARN-578. (Omkar Vinit + Joshi via vinodkv) + + HADOOP-9560. metrics2#JvmMetrics should have max memory size of JVM. + (Tsuyoshi Ozawa via suresh) + + HADOOP-9140 Cleanup rpc PB protos (sanjay Radia) + + HADOOP-9218 Document the Rpc-wrappers used internally (sanjay Radia) + + HADOOP-9574. Added new methods in AbstractDelegationTokenSecretManager for + helping YARN ResourceManager to reuse code for RM restart. (Jian He via + vinodkv) + + HADOOP-7391 Document Interface Classification from HADOOP-5073 (sanjay Radia) + + HADOOP-9287. Parallel-testing hadoop-common (Andrey Klochkov via jlowe) + + HADOOP-9604. Javadoc of FSDataOutputStream is slightly inaccurate. (Jingguo + Yao via atm) + + HADOOP-9625. HADOOP_OPTS not picked up by hadoop command. + (Paul Han via arpit) + + HADOOP-9649. Promoted YARN service life-cycle libraries into Hadoop Common + for usage across all Hadoop projects. (Zhijie Shen via vinodkv) + + HADOOP-9517. Documented various aspects of compatibility for Apache + Hadoop. (Karthik Kambatla via acmurthy) + + HADOOP-8608. Add Configuration API for parsing time durations. (cdouglas) + + HADOOP-9619 Mark stability of .proto files (sanjay Radia) + + HADOOP-9676. Make maximum RPC buffer size configurable (Colin Patrick + McCabe) + + HADOOP-9691. RPC clients can generate call ID using AtomicInteger instead of + synchronizing on the Client instance. (cnauroth) + + HADOOP-9661. Allow metrics sources to be extended. (sandyr via tucu) + + HADOOP-9370. Write FSWrapper class to wrap FileSystem and FileContext for + better test coverage. (Andrew Wang via Colin Patrick McCabe) + + HADOOP-9673. NetworkTopology: when a node can't be added, print out its + location for diagnostic purposes. (Colin Patrick McCabe) + + HADOOP-9414. Refactor out FSLinkResolver and relevant helper methods. + (Andrew Wang via Colin Patrick McCabe) + + HADOOP-9416. Add new symlink resolution methods in FileSystem and + FileSystemLinkResolver. (Andrew Wang via Colin Patrick McCabe) + + HADOOP-9720. Rename Client#uuid to Client#clientId. (Arpit Agarwal via + suresh) + + HADOOP-9734. Common protobuf definitions for GetUserMappingsProtocol, + RefreshAuthorizationPolicyProtocol and RefreshUserMappingsProtocol (jlowe) + + HADOOP-9716. Rpc retries should use the same call ID as the original call. + (szetszwo) + + HADOOP-9717. Add retry attempt count to the RPC requests. (jing9) + + HADOOP-9751. Add clientId and retryCount to RpcResponseHeaderProto. + (szetszwo) + + HADOOP-9754. Remove unnecessary "throws IOException/InterruptedException", + and fix generic and other javac warnings. (szetszwo) + + HADOOP-9760. Move GSet and related classes to common from HDFS. + (suresh) + + HADOOP-9756. Remove the deprecated getServer(..) methods from RPC. + (Junping Du via szetszwo) + + HADOOP-9770. Make RetryCache#state non volatile. (suresh) + + HADOOP-9786. RetryInvocationHandler#isRpcInvocation should support + ProtocolTranslator. (suresh and jing9) + + OPTIMIZATIONS + + HADOOP-9150. Avoid unnecessary DNS resolution attempts for logical URIs + (todd) + + HADOOP-9845. Update protobuf to 2.5 from 2.4.x. (tucu) + + HADOOP-9872. Improve protoc version handling and detection. (tucu) + + BUG FIXES + + HADOOP-9451. Fault single-layer config if node group topology is enabled. + (Junping Du via llu) + + HADOOP-9294. GetGroupsTestBase fails on Windows. (Chris Nauroth via suresh) + + HADOOP-9305. Add support for running the Hadoop client on 64-bit AIX. (atm) + + HADOOP-9245. mvn clean without running mvn install before fails. + (Karthik Kambatla via suresh) + + HADOOP-9246 Execution phase for hadoop-maven-plugin should be + process-resources (Karthik Kambatla and Chris Nauroth via jlowe) + + HADOOP-9297. remove old record IO generation and tests. (tucu) + + HADOOP-9154. SortedMapWritable#putAll() doesn't add key/value classes to + the map. (Karthik Kambatla via tomwhite) + + HADOOP-9304. remove addition of avro genreated-sources dirs to build. (tucu) + + HADOOP-9267. hadoop -help, -h, --help should show usage instructions. + (Andrew Wang via atm) + + HADOOP-8569. CMakeLists.txt: define _GNU_SOURCE and _LARGEFILE_SOURCE. + (Colin Patrick McCabe via atm) + + HADOOP-9323. Fix typos in API documentation. (suresh) + + HADOOP-7487. DF should throw a more reasonable exception when mount cannot + be determined. (Andrew Wang via atm) + + HADOOP-8917. add LOCALE.US to toLowerCase in SecurityUtil.replacePattern. + (Arpit Gupta via suresh) + + HADOOP-9342. Remove jline from distribution. (thw via tucu) + + HADOOP-9230. TestUniformSizeInputFormat fails intermittently. + (kkambatl via tucu) + + HADOOP-9349. Confusing output when running hadoop version from one hadoop + installation when HADOOP_HOME points to another. (sandyr via tucu) + + HADOOP-9337. org.apache.hadoop.fs.DF.getMount() does not work on Mac OS. + (Ivan A. Veselovsky via atm) + + HADOOP-9369. DNS#reverseDns() can return hostname with . appended at the + end. (Karthik Kambatla via atm) + + HADOOP-9379. capture the ulimit info after printing the log to the + console. (Arpit Gupta via suresh) + + HADOOP-9399. protoc maven plugin doesn't work on mvn 3.0.2 (todd) + + HADOOP-9407. commons-daemon 1.0.3 dependency has bad group id causing + build issues. (Sangjin Lee via suresh) + + HADOOP-9405. TestGridmixSummary#testExecutionSummarizer is broken. (Andrew + Wang via atm) + + HADOOP-9430. TestSSLFactory fails on IBM JVM. (Amir Sanjar via suresh) + + HADOOP-9125. LdapGroupsMapping threw CommunicationException after some + idle time. (Kai Zheng via atm) + + HADOOP-9429. TestConfiguration fails with IBM JAVA. (Amir Sanjar via + suresh) + + HADOOP-9222. Cover package with org.apache.hadoop.io.lz4 unit tests (Vadim + Bondarev via jlowe) + + HADOOP-9233. Cover package org.apache.hadoop.io.compress.zlib with unit + tests (Vadim Bondarev via jlowe) + + HADOOP-9211. Set default max heap size in HADOOP_CLIENT_OPTS to 512m + in order to avoid OOME. (Plamen Jeliazkov via shv) + + HADOOP-9473. Typo in FileUtil copy() method. (Glen Mazza via suresh) + + HADOOP-9504. MetricsDynamicMBeanBase has concurrency issues in + createMBeanInfo (Liang Xie via jlowe) + + HADOOP-9455. HADOOP_CLIENT_OPTS appended twice causes JVM failures. + (Chris Nauroth via suresh) + + HADOOP-9550. Remove aspectj dependency. (kkambatl via tucu) + + HADOOP-9549. WebHdfsFileSystem hangs on close(). (daryn via kihwal) + + HADOOP-9485. No default value in the code for + hadoop.rpc.socket.factory.class.default. (Colin Patrick McCabe via atm) + + HADOOP-9459. ActiveStandbyElector can join election even before + Service HEALTHY, and results in null data at ActiveBreadCrumb. + (Vinay and todd via todd) + + HADOOP-9307. BufferedFSInputStream.read returns wrong results + after certain seeks. (todd) + + HADOOP-9220. Unnecessary transition to standby in ActiveStandbyElector. + (tom and todd via todd) + + HADOOP-9563. Fix incompatibility introduced by HADOOP-9523. + (Tian Hong Wang via suresh) + + HADOOP-9566. Performing direct read using libhdfs sometimes raises SIGPIPE + (which in turn throws SIGABRT) causing client crashes. (Colin Patrick + McCabe via atm) + + HADOOP-9481. Broken conditional logic with HADOOP_SNAPPY_LIBRARY. (Vadim + Bondarev via atm) + + HADOOP-9593. stack trace printed at ERROR for all yarn clients without + hadoop.home set (stevel) + + HADOOP-8957. AbstractFileSystem#IsValidName should be overridden for + embedded file systems like ViewFs (Chris Nauroth via Sanjay Radia) + + HADOOP-9607. Fixes in Javadoc build (Timothy St. Clair via cos) + + HADOOP-9605. Update junit dependency. (Timothy St. Clair via cos) + + HADOOP-9581. hadoop --config non-existent directory should result in error + (Ashwin Shankar via jlowe) + + HADOOP-9638. Parallel test changes caused invalid test path for several HDFS + tests on Windows (Andrey Klochkov via cnauroth) + + HADOOP-9632. TestShellCommandFencer will fail if there is a 'host' machine in + the network. (Chuan Liu via cnauroth) + + HADOOP-9624. TestFSMainOperationsLocalFileSystem failed when the Hadoop test + root path has "X" in its name. (Xi Fang via cnauroth) + + HADOOP-9439. JniBasedUnixGroupsMapping: fix some crash bugs. (Colin + Patrick McCabe) + + HADOOP-9656. Gridmix unit tests fail on Windows and Linux. (Chuan Liu via + cnauroth) + + HADOOP-9707. Fix register lists for crc32c inline assembly. (todd via + kihwal) + + HADOOP-9738. TestDistCh fails. (jing9 via kihwal) + + HADOOP-9759. Add support for NativeCodeLoader#getLibraryName on Windows. + (Chuan Liu via cnauroth) + + HADOOP-9773. TestLightWeightCache should not set size limit to zero when + testing it. (szetszwo) + + HADOOP-9507. LocalFileSystem rename() is broken in some cases when + destination exists. (cnauroth) + + HADOOP-9816. RPC Sasl QOP is broken (daryn) + + HADOOP-9850. RPC kerberos errors don't trigger relogin. (daryn via kihwal) + + BREAKDOWN OF HADOOP-8562 SUBTASKS AND RELATED JIRAS + + HADOOP-8924. Hadoop Common creating package-info.java must not depend on + sh. (Chris Nauroth via suresh) + + HADOOP-8945. Merge winutils from branch-1-win to branch-trunk-win. + (Bikas Saha, Chuan Liu, Giridharan Kesavan, Ivan Mitic, and Steve Maine + ported by Chris Nauroth via suresh) + + HADOOP-8946. winutils: compile codebase during Maven build on + branch-trunk-win. (Chris Nauroth via suresh) + + HADOOP-8947. Merge FileUtil and Shell changes from branch-1-win to + branch-trunk-win to enable initial test pass. (Raja Aluri, Davio Lao, + Sumadhur Reddy Bolli, Ahmed El Baz, Kanna Karanam, Chuan Liu, + Ivan Mitic, Chris Nauroth, and Bikas Saha via suresh) + + HADOOP-8954. "stat" executable not found on Windows. (Bikas Saha, Ivan Mitic + ported by Chris Narouth via suresh) + + HADOOP-8959. TestUserGroupInformation fails on Windows due to "id" executable + not found. (Bikas Saha, Ivan Mitic, ported by Chris Narouth via suresh) + + HADOOP-8955. "chmod" executable not found on Windows. + (Chris Nauroth via suresh) + + HADOOP-8960. TestMetricsServlet fails on Windows. (Ivan Mitic via suresh) + + HADOOP-8961. GenericOptionsParser URI parsing failure on Windows. + (Ivan Mitic via suresh) + + HADOOP-8949. Remove FileUtil.CygPathCommand dead code. (Chris Nauroth via + suresh) + + HADOOP-8956. FileSystem.primitiveMkdir failures on Windows cause multiple + test suites to fail. (Chris Nauroth via suresh) + + HADOOP-8978. TestTrash fails on Windows. (Chris Nauroth via suresh) + + HADOOP-8979. TestHttpServer fails on Windows. (Chris Nauroth via suresh) + + HADOOP-8953. Shell PathData parsing failures on Windows. (Arpit Agarwal via + suresh) + + HADOOP-8975. TestFileContextResolveAfs fails on Windows. (Chris Nauroth via + suresh) + + HADOOP-8977. Multiple FsShell test failures on Windows. (Chris Nauroth via + suresh) + + HADOOP-9005. Merge hadoop cmd line scripts from branch-1-win. (David Lao, + Bikas Saha, Lauren Yang, Chuan Liu, Thejas M Nair and Ivan Mitic via suresh) + + HADOOP-9008. Building hadoop tarball fails on Windows. (Chris Nauroth via + suresh) + + HADOOP-9011. saveVersion.py does not include branch in version annotation. + (Chris Nauroth via suresh) + + HADOOP-9110. winutils ls off-by-one error indexing MONTHS array can cause + access violation. (Chris Nauroth via suresh) + + HADOOP-9056. Build native library on Windows. (Chuan Liu, Arpit Agarwal via + suresh) + + HADOOP-9144. Fix findbugs warnings. (Chris Nauroth via suresh) + + HADOOP-9081. Add TestWinUtils. (Chuan Liu, Ivan Mitic, Chris Nauroth, + and Bikas Saha via suresh) + + HADOOP-9146. Fix sticky bit regression on branch-trunk-win. + (Chris Nauroth via suresh) + + HADOOP-9266. Fix javac, findbugs, and release audit warnings on + branch-trunk-win. (Chris Nauroth via suresh) + + HADOOP-9270. Remove a stale java comment from FileUtil. (Chris Nauroth via + szetszwo) + + HADOOP-9271. Revert Python build scripts from branch-trunk-win. + (Chris Nauroth via suresh) + + HADOOP-9313. Remove spurious mkdir from hadoop-config.cmd. + (Ivan Mitic via suresh) + + HADOOP-9309. Test failures on Windows due to UnsatisfiedLinkError + in NativeCodeLoader#buildSupportsSnappy. (Arpit Agarwal via suresh) + + HADOOP-9347. Add instructions to BUILDING.txt describing how to + build on Windows. (Chris Nauroth via suresh) + + HADOOP-9348. Address TODO in winutils to add more command line usage + and examples. (Chris Nauroth via suresh) + + HADOOP-9354. Windows native project files missing license headers. + (Chris Nauroth via suresh) + + HADOOP-9356. Remove remaining references to cygwin/cygpath from scripts. + (Chris Nauroth via suresh) + + HADOOP-9232. JniBasedUnixGroupsMappingWithFallback fails on Windows + with UnsatisfiedLinkError. (Ivan Mitic via suresh) + + HADOOP-9368. Add timeouts to new tests in branch-trunk-win. + (Arpit Agarwal via suresh) + + HADOOP-9373. Merge CHANGES.branch-trunk-win.txt to CHANGES.txt. + (suresh) + + HADOOP-9372. Fix bad timeout annotations on tests. + (Arpit Agarwal via suresh) + + HADOOP-9376. TestProxyUserFromEnv fails on a Windows domain joined machine. + (Ivan Mitic via suresh) + + HADOOP-9365. TestHAZKUtil fails on Windows. (Ivan Mitic via suresh) + + HADOOP-9364. PathData#expandAsGlob does not return correct results for + absolute paths on Windows. (Ivan Mitic via suresh) + + HADOOP-8973. DiskChecker cannot reliably detect an inaccessible disk on + Windows with NTFS ACLs. (Chris Nauroth via suresh) + + HADOOP-9388. TestFsShellCopy fails on Windows. (Ivan Mitic via suresh) + + HADOOP-9387. Fix DF so that it won't execute a shell command on Windows + to compute the file system/mount point. (Ivan Mitic via szetszwo) + + HADOOP-9353. Activate native-win maven profile by default on Windows. + (Arpit Agarwal via szetszwo) + + HADOOP-9437. TestNativeIO#testRenameTo fails on Windows due to assumption + that POSIX errno is embedded in NativeIOException. (Chris Nauroth via + suresh) + + HADOOP-9443. Port winutils static code analysis change to trunk. + (Chuan Liu via suresh) + + HADOOP-9290. Some tests cannot load native library on windows. + (Chris Nauroth via suresh) + + HADOOP-9500. TestUserGroupInformation#testGetServerSideGroups fails on + Windows due to failure to find winutils.exe. (Chris Nauroth via suresh) + + HADOOP-9490. LocalFileSystem#reportChecksumFailure not closing the + checksum file handle before rename. (Ivan Mitic via suresh) + + HADOOP-9524. Fix ShellCommandFencer to work on Windows. + (Arpit Agarwal via suresh) + + HADOOP-9413. Add common utils for File#setReadable/Writable/Executable & + File#canRead/Write/Execute that work cross-platform. (Ivan Mitic via suresh) + + HADOOP-9532. HADOOP_CLIENT_OPTS is appended twice by Windows cmd scripts. + (Chris Nauroth via suresh) + + HADOOP-9043. Disallow in winutils creating symlinks with forwards slashes. + (Chris Nauroth and Arpit Agarwal via suresh) + + HADOOP-9483. winutils support for readlink command. + (Arpit Agarwal via suresh) + + HADOOP-9488. FileUtil#createJarWithClassPath only substitutes environment + variables from current process environment/does not support overriding + when launching new process (Chris Nauroth via bikas) + + HADOOP-9556. disable HA tests on Windows that fail due to ZooKeeper client + connection management bug. (Chris Nauroth via suresh) + + HADOOP-9553. TestAuthenticationToken fails on Windows. + (Arpit Agarwal via suresh) + + HADOOP-9397. Incremental dist tar build fails. (Chris Nauroth via jlowe) + + HADOOP-9131. Turn off TestLocalFileSystem#testListStatusWithColons on + Windows. (Chris Nauroth via suresh) + + HADOOP-9526. TestShellCommandFencer and TestShell fail on Windows. + (Arpit Agarwal via suresh) + + HADOOP-8982. TestSocketIOWithTimeout fails on Windows. + (Chris Nauroth via suresh) + + HADOOP-8958. ViewFs:Non absolute mount name failures when running + multiple tests on Windows. (Chris Nauroth via suresh) + + HADOOP-9599. hadoop-config.cmd doesn't set JAVA_LIBRARY_PATH correctly. + (Mostafa Elhemali via ivanmi) + + HADOOP-9637. Adding Native Fstat for Windows as needed by YARN. (Chuan Liu + via cnauroth) + + HADOOP-9264. Port change to use Java untar API on Windows from + branch-1-win to trunk. (Chris Nauroth via suresh) + + HADOOP-9678. TestRPC#testStopsAllThreads intermittently fails on Windows. + (Ivan Mitic via cnauroth) + + HADOOP-9681. FileUtil.unTarUsingJava() should close the InputStream upon + finishing. (Chuan Liu via cnauroth) + + HADOOP-9665. Fixed BlockDecompressorStream#decompress to return -1 rather + than throw EOF at end of file. (Zhijie Shen via acmurthy) + + HADOOP-8440. HarFileSystem.decodeHarURI fails for URIs whose host contains + numbers. (Ivan Mitic via cnauroth) + + HADOOP-9643. org.apache.hadoop.security.SecurityUtil calls + toUpperCase(Locale.getDefault()) as well as toLowerCase(Locale.getDefault()) + on hadoop.security.authentication value. (markrmiller@gmail.com via tucu) + + HADOOP-9701. mvn site ambiguous links in hadoop-common. (kkambatl via tucu) + +Release 2.0.5-alpha - 06/06/2013 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + + HADOOP-9407. commons-daemon 1.0.3 dependency has bad group id causing + build issues. (Sangjin Lee via suresh) + +Release 2.0.4-alpha - 2013-04-25 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + + HADOOP-9467. Metrics2 record filter should check name as well as tags. + (Chris Nauroth and Ganeshan Iyler via llu) + + HADOOP-9406. hadoop-client leaks dependency on JDK tools jar. (tucu) + + HADOOP-9301. hadoop client servlet/jsp/jetty/tomcat JARs creating + conflicts in Oozie & HttpFS. (tucu) + + HADOOP-9299. kerberos name resolution is kicking in even when kerberos + is not configured (daryn) + + HADOOP-9408. misleading description for net.topology.table.file.name + property in core-default.xml. (rajeshbabu via suresh) + + HADOOP-9444. Modify hadoop-policy.xml to replace unexpanded variables to a + default value of '*'. (Roman Shaposhnik via vinodkv) + + HADOOP-9471. hadoop-client wrongfully excludes jetty-util JAR, + breaking webhdfs. (tucu) + +Release 2.0.3-alpha - 2013-02-06 + + INCOMPATIBLE CHANGES + + HADOOP-8999. SASL negotiation is flawed (daryn) + + NEW FEATURES + + HADOOP-8561. Introduce HADOOP_PROXY_USER for secure impersonation in child + hadoop client processes. (Yu Gao via llu) + + HADOOP-8597. Permit FsShell's text command to read Avro files. + (Ivan Vladimirov Ivanov via cutting) + + HADOOP-9020. Add a SASL PLAIN server (daryn via bobby) + + HADOOP-9090. Support on-demand publish of metrics. (Mostafa Elhemali via + suresh) + + HADOOP-9054. Add AuthenticationHandler that uses Kerberos but allows for + an alternate form of authentication for browsers. (rkanter via tucu) + + IMPROVEMENTS + + HADOOP-8789. Tests setLevel(Level.OFF) should be Level.ERROR. + (Andy Isaacson via eli) + + HADOOP-8755. Print thread dump when tests fail due to timeout. (Andrey + Klochkov via atm) + + HADOOP-8806. libhadoop.so: dlopen should be better at locating + libsnappy.so, etc. (Colin Patrick McCabe via eli) + + HADOOP-8812. ExitUtil#terminate should print Exception#toString. (eli) + + HADOOP-8736. Add Builder for building RPC server. (Brandon Li via Suresh) + + HDFS-3957. Change MutableQuantiles to use a shared thread for rolling + over metrics. (Andrew Wang via todd) + + HADOOP-8851. Use -XX:+HeapDumpOnOutOfMemoryError JVM option in the forked + tests. (Ivan A. Veselovsky via atm) + + HADOOP-8783. Improve RPC.Server's digest auth (daryn) + + HADOOP-8889. Upgrade to Surefire 2.12.3 (todd) + + HADOOP-8804. Improve Web UIs when the wildcard address is used. + (Senthil Kumar via eli) + + HADOOP-8894. GenericTestUtils.waitFor should dump thread stacks on timeout + (todd) + + HADOOP-8909. Hadoop Common Maven protoc calls must not depend on external + sh script. (Chris Nauroth via suresh) + + HADOOP-8911. CRLF characters in source and text files. + (Raja Aluri via suresh) + + HADOOP-8912. Add .gitattributes file to prevent CRLF and LF mismatches + for source and text files. (Raja Aluri via suresh) + + HADOOP-8784. Improve IPC.Client's token use (daryn) + + HADOOP-8929. Add toString, other improvements for SampleQuantiles (todd) + + HADOOP-8922. Provide alternate JSONP output for JMXJsonServlet to allow + javascript in browser (Damien Hardy via bobby) + + HADOOP-8931. Add Java version to startup message. (eli) + + HADOOP-8925. Remove the packaging. (eli) + + HADOOP-8985. Add namespace declarations in .proto files for languages + other than java. (Binglin Chan via suresh) + + HADOOP-9009. Add SecurityUtil methods to get/set authentication method + (daryn via bobby) + + HADOOP-9010. Map UGI authenticationMethod to RPC authMethod (daryn via + bobby) + + HADOOP-9013. UGI should not hardcode loginUser's authenticationType (daryn + via bobby) + + HADOOP-9014. Standardize creation of SaslRpcClients (daryn via bobby) + + HADOOP-9015. Standardize creation of SaslRpcServers (daryn via bobby) + + HADOOP-8860. Split MapReduce and YARN sections in documentation navigation. + (tomwhite via tucu) + + HADOOP-9021. Enforce configured SASL method on the server (daryn via + bobby) + + HADOO-8998. set Cache-Control no-cache header on all dynamic content. (tucu) + + HADOOP-9035. Generalize setup of LoginContext (daryn via bobby) + + HADOOP-9093. Move all the Exception in PathExceptions to o.a.h.fs package. + (suresh) + + HADOOP-9042. Add a test for umask in FileSystemContractBaseTest. + (Colin McCabe via eli) + + HADOOP-9127. Update documentation for ZooKeeper Failover Controller. + (Daisuke Kobayashi via atm) + + HADOOP-9004. Allow security unit tests to use external KDC. (Stephen Chu + via suresh) + + HADOOP-9147. Add missing fields to FIleStatus.toString. + (Jonathan Allen via suresh) + + HADOOP-8427. Convert Forrest docs to APT, incremental. (adi2 via tucu) + + HADOOP-9162. Add utility to check native library availability. + (Binglin Chang via suresh) + + HADOOP-9173. Add security token protobuf definition to common and + use it in hdfs. (suresh) + + HADOOP-9119. Add test to FileSystemContractBaseTest to verify integrity + of overwritten files. (Steve Loughran via suresh) + + HADOOP-9192. Move token related request/response messages to common. + (suresh) + + HADOOP-8712. Change default hadoop.security.group.mapping to + JniBasedUnixGroupsNetgroupMappingWithFallback (Robert Parker via todd) + + HADOOP-9106. Allow configuration of IPC connect timeout. + (Rober Parker via suresh) + + HADOOP-9216. CompressionCodecFactory#getCodecClasses should trim the + result of parsing by Configuration. (Tsuyoshi Ozawa via todd) + + HADOOP-9231. Parametrize staging URL for the uniformity of + distributionManagement. (Konstantin Boudnik via suresh) + + HADOOP-9276. Allow BoundedByteArrayOutputStream to be resettable. + (Arun Murthy via hitesh) + + HADOOP-7688. Add servlet handler check in HttpServer.start(). + (Uma Maheswara Rao G via szetszwo) + + HADOOP-7886. Add toString to FileStatus. (SreeHari via jghoman) + + OPTIMIZATIONS + + HADOOP-8866. SampleQuantiles#query is O(N^2) instead of O(N). (Andrew Wang + via atm) + + HADOOP-8926. hadoop.util.PureJavaCrc32 cache hit-ratio is low for static + data (Gopal V via bobby) + + BUG FIXES + + HADOOP-9041. FsUrlStreamHandlerFactory could cause an infinite loop in + FileSystem initialization. (Yanbo Liang and Radim Kolar via llu) + + HADOOP-8418. Update UGI Principal classes name for running with + IBM JDK on 64 bits Windows. (Yu Gao via eyang) + + HADOOP-8795. BASH tab completion doesn't look in PATH, assumes path to + executable is specified. (Sean Mackrory via atm) + + HADOOP-8780. Update DeprecatedProperties apt file. (Ahmed Radwan via + tomwhite) + + HADOOP-8833. fs -text should make sure to call inputstream.seek(0) + before using input stream. (tomwhite and harsh) + + HADOOP-8791. Fix rm command documentation to indicte it deletes + files and not directories. (Jing Zhao via suresh) + + HADOOP-8855. SSL-based image transfer does not work when Kerberos + is disabled. (todd via eli) + + HADOOP-8616. ViewFS configuration requires a trailing slash. (Sandy Ryza + via atm) + + HADOOP-8756. Fix SEGV when libsnappy is in java.library.path but + not LD_LIBRARY_PATH. (Colin Patrick McCabe via eli) + + HADOOP-8881. FileBasedKeyStoresFactory initialization logging should + be debug not info. (tucu) + + HADOOP-8913. hadoop-metrics2.properties should give units in comment + for sampling period. (Sandy Ryza via suresh) + + HADOOP-8878. Uppercase namenode hostname causes hadoop dfs calls with + webhdfs filesystem and fsck to fail when security is on. + (Arpit Gupta via suresh) + + HADOOP-8901. GZip and Snappy support may not work without unversioned + libraries (Colin Patrick McCabe via todd) + + HADOOP-8883. Anonymous fallback in KerberosAuthenticator is broken. + (rkanter via tucu) + + HADOOP-8900. BuiltInGzipDecompressor throws IOException - stored gzip size + doesn't match decompressed size. (Andy Isaacson via suresh) + + HADOOP-8948. TestFileUtil.testGetDU fails on Windows due to incorrect + assumption of line separator. (Chris Nauroth via suresh) + + HADOOP-8951. RunJar to fail with user-comprehensible error + message if jar missing. (stevel via suresh) + + HADOOP-8713. TestRPCCompatibility fails intermittently with JDK7 + (Trevor Robinson via tgraves) + + HADOOP-9012. IPC Client sends wrong connection context (daryn via bobby) + + HADOOP-7115. Add a cache for getpwuid_r and getpwgid_r calls (tucu) + + HADOOP-6607. Add different variants of non caching HTTP headers. (tucu) + + HADOOP-9049. DelegationTokenRenewer needs to be Singleton and FileSystems + should register/deregister to/from. (Karthik Kambatla via tomwhite) + + HADOOP-9064. Augment DelegationTokenRenewer API to cancel the tokens on + calls to removeRenewAction. (kkambatl via tucu) + + HADOOP-9103. UTF8 class does not properly decode Unicode characters + outside the basic multilingual plane. (todd) + + HADOOP-9070. Kerberos SASL server cannot find kerberos key. (daryn via atm) + + HADOOP-6762. Exception while doing RPC I/O closes channel + (Sam Rash and todd via todd) + + HADOOP-9126. FormatZK and ZKFC startup can fail due to zkclient connection + establishment delay. (Rakesh R and todd via todd) + + HADOOP-9113. o.a.h.fs.TestDelegationTokenRenewer is failing intermittently. + (Karthik Kambatla via eli) + + HADOOP-9135. JniBasedUnixGroupsMappingWithFallback should log at debug + rather than info during fallback. (Colin Patrick McCabe via todd) + + HADOOP-9152. HDFS can report negative DFS Used on clusters with very small + amounts of data. (Brock Noland via atm) + + HADOOP-9153. Support createNonRecursive in ViewFileSystem. + (Sandy Ryza via tomwhite) + + HADOOP-9181. Set daemon flag for HttpServer's QueuedThreadPool. + (Liang Xie via suresh) + + HADOOP-9155. FsPermission should have different default value, 777 for + directory and 666 for file. (Binglin Chang via atm) + + HADOOP-9183. Potential deadlock in ActiveStandbyElector. (tomwhite) + + HADOOP-9203. RPCCallBenchmark should find a random available port. + (Andrew Purtell via suresh) + + HADOOP-9178. src/main/conf is missing hadoop-policy.xml. + (Sandy Ryza via eli) + + HADOOP-8816. HTTP Error 413 full HEAD if using kerberos authentication. + (moritzmoeller via tucu) + + HADOOP-9212. Potential deadlock in FileSystem.Cache/IPC/UGI. (tomwhite) + + HADOOP-8589 ViewFs tests fail when tests and home dirs are nested. + (sanjay Radia) + + HADOOP-9193. hadoop script can inadvertently expand wildcard arguments + when delegating to hdfs script. (Andy Isaacson via todd) + + HADOOP-9215. when using cmake-2.6, libhadoop.so doesn't get created + (only libhadoop.so.1.0.0) (Colin Patrick McCabe via todd) + + HADOOP-8857. hadoop.http.authentication.signature.secret.file docs + should not state that secret is randomly generated. (tucu) + + HADOOP-9190. packaging docs is broken. (Andy Isaacson via tgraves) + + HADOOP-9221. Convert remaining xdocs to APT. (Andy Isaacson via atm) + + HADOOP-8981. TestMetricsSystemImpl fails on Windows. (Xuan Gong via suresh) + + HADOOP-9124. SortedMapWritable violates contract of Map interface for + equals() and hashCode(). (Surenkumar Nihalani via tomwhite) + + HADOOP-9278. Fix the file handle leak in HarMetaData.parseMetaData() in + HarFileSystem. (Chris Nauroth via szetszwo) + + HADOOP-9252. In StringUtils, humanReadableInt(..) has a race condition and + the synchronization of limitDecimalTo2(double) can be avoided. (szetszwo) + + HADOOP-9260. Hadoop version may be not correct when starting name node or + data node. (Chris Nauroth via jlowe) + + HADOOP-9289. FsShell rm -f fails for non-matching globs. (Daryn Sharp via + suresh) + +Release 2.0.2-alpha - 2012-09-07 + + INCOMPATIBLE CHANGES + + HADOOP-8388. Remove unused BlockLocation serialization. + (Colin Patrick McCabe via eli) + + HADOOP-8689. Make trash a server side configuration option. (eli) + + HADOOP-8710. Remove ability for users to easily run the trash emptire. (eli) + + HADOOP-8794. Rename YARN_HOME to HADOOP_YARN_HOME. (vinodkv via acmurthy) + + NEW FEATURES + + HDFS-3042. Automatic failover support for NameNode HA (todd) + (see dedicated section below for breakdown of subtasks) + + HADOOP-8135. Add ByteBufferReadable interface to FSDataInputStream. (Henry + Robinson via atm) + + HADOOP-8458. Add management hook to AuthenticationHandler to enable + delegation token operations support (tucu) + + HADOOP-8465. hadoop-auth should support ephemeral authentication (tucu) + + HADOOP-8644. AuthenticatedURL should be able to use SSLFactory. (tucu) + + HADOOP-8581. add support for HTTPS to the web UIs. (tucu) + + HADOOP-7754. Expose file descriptors from Hadoop-wrapped local + FileSystems (todd and ahmed via tucu) + + HADOOP-8240. Add a new API to allow users to specify a checksum type + on FileSystem.create(..). (Kihwal Lee via szetszwo) + + IMPROVEMENTS + + HADOOP-8340. SNAPSHOT build versions should compare as less than their eventual + final release. (todd) + + HADOOP-8361. Avoid out-of-memory problems when deserializing strings. + (Colin Patrick McCabe via eli) + + HADOOP-8224. Don't hardcode hdfs.audit.logger in the scripts. + (Tomohiko Kinebuchi via eli) + + HADOOP-8398. Cleanup BlockLocation. (eli) + + HADOOP-8422. Deprecate FileSystem#getDefault* and getServerDefault + methods that don't take a Path argument. (eli) + + HADOOP-8323. Add javadoc and tests for Text.clear() behavior (harsh) + + HADOOP-8358. Config-related WARN for dfs.web.ugi can be avoided. (harsh) + + HADOOP-8450. Remove src/test/system. (eli) + + HADOOP-8244. Improve comments on ByteBufferReadable.read. (Henry Robinson + via atm) + + HADOOP-8368. Use CMake rather than autotools to build native code (ccccabe via tucu) + + HADOOP-8524. Allow users to get source of a Configuration + parameter (harsh) + + HADOOP-8449. hadoop fs -text fails with compressed sequence files + with the codec file extension (harsh) + + HADOOP-6802. Remove FS_CLIENT_BUFFER_DIR_KEY = "fs.client.buffer.dir" + from CommonConfigurationKeys.java (not used, deprecated) + (Sho Shimauchi via harsh) + + HADOOP-3450. Add tests to Local Directory Allocator for + asserting their URI-returning capability (Sho Shimauchi via harsh) + + HADOOP-8463. hadoop.security.auth_to_local needs a key definition and doc. + (Madhukara Phatak via eli) + + HADOOP-8533. Remove parallel call ununsed capability in RPC. + (Brandon Li via suresh) + + HADOOP-8423. MapFile.Reader.get() crashes jvm or throws + EOFException on Snappy or LZO block-compressed data + (todd via harsh) + + HADOOP-8541. Better high-percentile latency metrics. (Andrew Wang via atm) + + HADOOP-8362. Improve exception message when Configuration.set() is + called with a null key or value. (Madhukara Phatak + and Suresh Srinivas via harsh) + + HADOOP-7818. DiskChecker#checkDir should fail if the directory is + not executable. (Madhukara Phatak via harsh) + + HADOOP-8531. SequenceFile Writer can throw out a better error if a + serializer or deserializer isn't available + (Madhukara Phatak via harsh) + + HADOOP-8609. IPC server logs a useless message when shutting down socket. + (Jon Zuanich via atm) + + HADOOP-8620. Add -Drequire.fuse and -Drequire.snappy. (Colin + Patrick McCabe via eli) + + HADOOP-8687. Upgrade log4j to 1.2.17. (eli) + + HADOOP-8278. Make sure components declare correct set of dependencies. + (tomwhite) + + HADOOP-8700. Use enum to define the checksum constants in DataChecksum. + (szetszwo) + + HADOOP-8686. Fix warnings in native code. (Colin Patrick McCabe via eli) + + HADOOP-8239. Add subclasses of MD5MD5CRC32FileChecksum to support file + checksum with CRC32C. (Kihwal Lee via szetszwo) + + HADOOP-8619. WritableComparator must implement no-arg constructor. + (Chris Douglas via Suresh) + + HADOOP-8075. Lower native-hadoop library log from info to debug. + (Hızır Sefa İrken via eli) + + HADOOP-8748. Refactor DFSClient retry utility methods to a new class + in org.apache.hadoop.io.retry. (Arun C Murthy via szetszwo) + + HADOOP-8754. Deprecate all the RPC.getServer() variants. (Brandon Li + via szetszwo) + + HADOOP-8801. ExitUtil#terminate should capture the exception stack trace. (eli) + + HADOOP-8819. Incorrectly & is used instead of && in some file system + implementations. (Brandon Li via suresh) + + HADOOP-7808. Port HADOOP-7510 - Add configurable option to use original + hostname in token instead of IP to allow server IP change. + (Daryn Sharp via suresh) + + BUG FIXES + + HADOOP-8372. NetUtils.normalizeHostName() incorrectly handles hostname + starting with a numeric character. (Junping Du via suresh) + + HADOOP-8393. hadoop-config.sh missing variable exports, causes Yarn jobs + to fail with ClassNotFoundException MRAppMaster. (phunt via tucu) + + HADOOP-8316. Audit logging should be disabled by default. (eli) + + HADOOP-8400. All commands warn "Kerberos krb5 configuration not found" + when security is not enabled. (tucu) + + HADOOP-8406. CompressionCodecFactory.CODEC_PROVIDERS iteration is + thread-unsafe (todd) + + HADOOP-8287. etc/hadoop is missing hadoop-env.sh (eli) + + HADOOP-8408. MR doesn't work with a non-default ViewFS mount table + and security enabled. (atm via eli) + + HADOOP-8329. Build fails with Java 7. (eli) + + HADOOP-8268. A few pom.xml across Hadoop project + may fail XML validation. (Radim Kolar via harsh) + + HADOOP-8444. Fix the tests FSMainOperationsBaseTest.java and + FileContextMainOperationsBaseTest.java to avoid potential + test failure (Madhukara Phatak via harsh) + + HADOOP-8452. DN logs backtrace when running under jsvc and /jmx is loaded + (Andy Isaacson via bobby) + + HADOOP-8460. Document proper setting of HADOOP_PID_DIR and + HADOOP_SECURE_DN_PID_DIR (bobby) + + HADOOP-8466. hadoop-client POM incorrectly excludes avro. (bmahe via tucu) + + HADOOP-8481. update BUILDING.txt to talk about cmake rather than autotools. + (Colin Patrick McCabe via eli) + + HADOOP-8485. Don't hardcode "Apache Hadoop 0.23" in the docs. (eli) + + HADOOP-8488. test-patch.sh gives +1 even if the native build fails. + (Colin Patrick McCabe via eli) + + HADOOP-8507. Avoid OOM while deserializing DelegationTokenIdentifer. + (Colin Patrick McCabe via eli) + + HADOOP-8433. Don't set HADOOP_LOG_DIR in hadoop-env.sh. + (Brahma Reddy Battula via eli) + + HADOOP-8509. JarFinder duplicate entry: META-INF/MANIFEST.MF exception (tucu) + + HADOOP-8512. AuthenticatedURL should reset the Token when the server returns + other than OK on authentication (tucu) + + HADOOP-8168. empty-string owners or groups causes {{MissingFormatWidthException}} + in o.a.h.fs.shell.Ls.ProcessPath() (ekoontz via tucu) + + HADOOP-8438. hadoop-validate-setup.sh refers to examples jar file which doesn't exist + (Devaraj K via umamahesh) + + HADOOP-8538. CMake builds fail on ARM. (Trevor Robinson via eli) + + HADOOP-8547. Package hadoop-pipes examples/bin directory (again). + (Colin Patrick McCabe via eli) + + HADOOP-8563. don't package hadoop-pipes examples/bin + (Colin Patrick McCabe via tgraves) + + HADOOP-8566. AvroReflectSerializer.accept(Class) throws a NPE if the class has no + package (primitive types and arrays). (tucu) + + HADOOP-8586. Fixup a bunch of SPNEGO misspellings. (eli) + + HADOOP-3886. Error in javadoc of Reporter, Mapper and Progressable + (Jingguo Yao via harsh) + + HADOOP-8587. HarFileSystem access of harMetaCache isn't threadsafe. (eli) + + HADOOP-8585. Fix initialization circularity between UserGroupInformation + and HadoopConfiguration. (Colin Patrick McCabe via atm) + + HADOOP-8552. Conflict: Same security.log.file for multiple users. + (kkambatl via tucu) + + HADOOP-8537. Fix TFile tests to pass even when native zlib support is not + compiled. (todd) + + HADOOP-8626. Typo in default setting for + hadoop.security.group.mapping.ldap.search.filter.user. (Jonathan Natkins + via atm) + + HADOOP-8480. The native build should honor -DskipTests. + (Colin Patrick McCabe via eli) + + HADOOP-8659. Native libraries must build with soft-float ABI for Oracle JVM + on ARM. (Trevor Robinson via todd) + + HADOOP-8654. TextInputFormat delimiter bug (Gelesh and Jason Lowe via + bobby) + + HADOOP-8614. IOUtils#skipFully hangs forever on EOF. + (Colin Patrick McCabe via eli) + + HADOOP-8720. TestLocalFileSystem should use test root subdirectory. + (Vlad Rozov via eli) + + HADOOP-8721. ZKFC should not retry 45 times when attempting a graceful + fence during a failover. (Vinayakumar B via atm) + + HADOOP-8632. Configuration leaking class-loaders (Costin Leau via bobby) + + HADOOP-4572. Can not access user logs - Jetty is not configured by default + to serve aliases/symlinks (ahmed via tucu) + + HADOOP-8660. TestPseudoAuthenticator failing with NPE. (tucu) + + HADOOP-8699. some common testcases create core-site.xml in test-classes + making other testcases to fail. (tucu) + + HADOOP-8031. Configuration class fails to find embedded .jar resources; + should use URL.openStream() (genman via tucu) + + HADOOP-8737. cmake: always use JAVA_HOME to find libjvm.so, jni.h, jni_md.h. + (Colin Patrick McCabe via eli) + + HADOOP-8747. Syntax error on cmake version 2.6 patch 2 in JNIFlags.cmake. (cmccabe via tucu) + + HADOOP-8722. Update BUILDING.txt with latest snappy info. + (Colin Patrick McCabe via eli) + + HADOOP-8764. CMake: HADOOP-8737 broke ARM build. (Trevor Robinson via eli) + + HADOOP-8770. NN should not RPC to self to find trash defaults. (eli) + + HADOOP-8648. libhadoop: native CRC32 validation crashes when + io.bytes.per.checksum=1. (Colin Patrick McCabe via eli) + + HADOOP-8766. FileContextMainOperationsBaseTest should randomize the root + dir. (Colin Patrick McCabe via atm) + + HADOOP-8749. HADOOP-8031 changed the way in which relative xincludes are handled in + Configuration. (ahmed via tucu) + + HADOOP-8431. Running distcp wo args throws IllegalArgumentException. + (Sandy Ryza via eli) + + HADOOP-8775. MR2 distcp permits non-positive value to -bandwidth option + which causes job never to complete. (Sandy Ryza via atm) + + HADOOP-8781. hadoop-config.sh should add JAVA_LIBRARY_PATH to LD_LIBRARY_PATH. (tucu) + + BREAKDOWN OF HDFS-3042 SUBTASKS + + HADOOP-8220. ZKFailoverController doesn't handle failure to become active + correctly (todd) + + HADOOP-8228. Auto HA: Refactor tests and add stress tests. (todd) + + HADOOP-8215. Security support for ZK Failover controller (todd) + + HADOOP-8245. Fix flakiness in TestZKFailoverController (todd) + + HADOOP-8257. TestZKFailoverControllerStress occasionally fails with Mockito + error (todd) + + HADOOP-8260. Replace ClientBaseWithFixes with our own modified copy of the + class (todd) + + HADOOP-8246. Auto-HA: automatically scope znode by nameservice ID (todd) + + HADOOP-8247. Add a config to enable auto-HA, which disables manual + FailoverController (todd) + + HADOOP-8306. ZKFC: improve error message when ZK is not running. (todd) + + HADOOP-8279. Allow manual failover to be invoked when auto-failover is + enabled. (todd) + + HADOOP-8276. Auto-HA: add config for java options to pass to zkfc daemon + (todd via eli) + + HADOOP-8405. ZKFC tests leak ZK instances. (todd) + +Release 2.0.0-alpha - 05-23-2012 + + INCOMPATIBLE CHANGES + + HADOOP-7920. Remove Avro Rpc. (suresh) + + NEW FEATURES + + HADOOP-7773. Add support for protocol buffer based RPC engine. + (suresh) + + HADOOP-7875. Add helper class to unwrap protobuf ServiceException. + (suresh) + + HADOOP-7454. Common side of High Availability Framework (HDFS-1623) + Contributed by Todd Lipcon, Aaron T. Myers, Eli Collins, Uma Maheswara Rao G, + Bikas Saha, Suresh Srinivas, Jitendra Nath Pandey, Hari Mankude, Brandon Li, + Sanjay Radia, Mingjie Lai, and Gregory Chanan + + HADOOP-8121. Active Directory Group Mapping Service. (Jonathan Natkins via + atm) + + HADOOP-7030. Add TableMapping topology implementation to read host to rack + mapping from a file. (Patrick Angeles and tomwhite via tomwhite) + + HADOOP-8206. Common portion of a ZK-based failover controller (todd) + + HADOOP-8210. Common side of HDFS-3148: The client should be able + to use multiple local interfaces for data transfer. (eli) + + HADOOP-8343. Allow configuration of authorization for JmxJsonServlet and + MetricsServlet (tucu) + + IMPROVEMENTS + + HADOOP-7524. Change RPC to allow multiple protocols including multuple + versions of the same protocol (sanjay Radia) + + HADOOP-7607. Simplify the RPC proxy cleanup process. (atm) + + HADOOP-7687. Make getProtocolSignature public (sanjay) + + HADOOP-7693. Enhance AvroRpcEngine to support the new #addProtocol + interface introduced in HADOOP-7524. (cutting) + + HADOOP-7716. RPC protocol registration on SS does not log the protocol name + (only the class which may be different) (sanjay) + + HADOOP-7776. Make the Ipc-Header in a RPC-Payload an explicit header. + (sanjay) + + HADOOP-7862. Move the support for multiple protocols to lower layer so + that Writable, PB and Avro can all use it (Sanjay) + + HADOOP-7876. Provided access to encoded key in DelegationKey for + use in protobuf based RPCs. (suresh) + + HADOOP-7899. Generate proto java files as part of the build. (tucu) + + HADOOP-7957. Classes deriving GetGroupsBase should be able to override + proxy creation. (jitendra) + + HADOOP-7965. Support for protocol version and signature in PB. (jitendra) + + HADOOP-8070. Add a standalone benchmark for RPC call performance. (todd) + + HADOOP-8084. Updates ProtoBufRpc engine to not do an unnecessary copy + for RPC request/response. (ddas) + + HADOOP-8085. Add RPC metrics to ProtobufRpcEngine. (Hari Mankude via + suresh) + + HADOOP-8098. KerberosAuthenticatorHandler should use _HOST replacement to + resolve principal name (tucu) + + HADOOP-8118. In metrics2.util.MBeans, change log level to trace for the + stack trace of InstanceAlreadyExistsException. (szetszwo) + + HADOOP-8125. make hadoop-client set of curated jars available in a + distribution tarball (rvs via tucu) + + HADOOP-7717. Move handling of concurrent client fail-overs to + RetryInvocationHandler (atm) + + HADOOP-7728. Enable task memory management to be configurable in hadoop + config setup script. (ramya) + + HADOOP-7358. Improve log levels when exceptions caught in RPC handler + (Todd Lipcon via shv) + + HADOOP-7557 Make IPC header be extensible (sanjay radia) + + HADOOP-7806. Support binding to sub-interfaces (eli) + + HADOOP-6941. Adds support for building Hadoop with IBM's JDK + (Stephen Watt, Eli and ddas) + + HADOOP-8183. Stop using "mapred.used.genericoptions.parser" (harsh) + + HADOOP-6924. Adds a directory to the list of directories to search + for the libjvm.so file. The new directory is found by running a 'find' + command and the first output is taken. This was done to handle the + build of Hadoop with IBM's JDK. (Stephen Watt, Guillermo Cabrera and ddas) + + HADOOP-8200. Remove HADOOP_[JOBTRACKER|TASKTRACKER]_OPTS. (eli) + + HADOOP-8184. ProtoBuf RPC engine uses the IPC layer reply packet. + (Sanjay Radia via szetszwo) + + HADOOP-8163. Improve ActiveStandbyElector to provide hooks for + fencing old active. (todd) + + HADOOP-8193. Refactor FailoverController/HAAdmin code to add an abstract + class for "target" services. (todd) + + HADOOP-8212. Improve ActiveStandbyElector's behavior when session expires + (todd) + + HADOOP-8216. Address log4j.properties inconsistencies btw main and + template dirs. (Patrick Hunt via eli) + + HADOOP-8149. Cap space usage of default log4j rolling policy. + (Patrick Hunt via eli) + + HADOOP-8211. Update commons-net version to 3.1. (eli) + + HADOOP-8236. haadmin should have configurable timeouts for failover + commands. (todd) + + HADOOP-8242. AbstractDelegationTokenIdentifier: add getter methods + for owner and realuser. (Colin Patrick McCabe via eli) + + HADOOP-8007. Use substitution tokens for fencing argument (todd) + + HADOOP-8077. HA: fencing method should be able to be configured on + a per-NN or per-NS basis (todd) + + HADOOP-8086. KerberosName silently sets defaultRealm to "" if the + Kerberos config is not found, it should log a WARN (tucu) + + HADOOP-8280. Move VersionUtil/TestVersionUtil and GenericTestUtils from + HDFS into Common. (Ahmed Radwan via atm) + + HADOOP-8117. Upgrade test build to Surefire 2.12 (todd) + + HADOOP-8152. Expand public APIs for security library classes. (atm via eli) + + HADOOP-7549. Use JDK ServiceLoader mechanism to find FileSystem implementations. (tucu) + + HADOOP-8185. Update namenode -format documentation and add -nonInteractive + and -force. (Arpit Gupta via atm) + + HADOOP-8214. make hadoop script recognize a full set of deprecated commands (rvs via tucu) + + HADOOP-8347. Hadoop Common logs misspell 'successful'. + (Philip Zeyliger via eli) + + HADOOP-8350. Improve NetUtils.getInputStream to return a stream which has + a tunable timeout. (todd) + + HADOOP-8356. FileSystem service loading mechanism should print the FileSystem + impl it is failing to load (tucu) + + HADOOP-8353. hadoop-daemon.sh and yarn-daemon.sh can be misleading on stop. + (Roman Shaposhnik via atm) + + HADOOP-8113. Correction to BUILDING.txt: HDFS needs ProtocolBuffer, too + (not just MapReduce). Contributed by Eugene Koontz. + + HADOOP-8285 Use ProtoBuf for RpcPayLoadHeader (sanjay radia) + + HADOOP-8366 Use ProtoBuf for RpcResponseHeader (sanjay radia) + + HADOOP-7729. Send back valid HTTP response if user hits IPC port with + HTTP GET. (todd) + + HADOOP-7987. Support setting the run-as user in unsecure mode. (jitendra) + + HADOOP-7994. Remove getProtocolVersion and getProtocolSignature from the + client side translator and server side implementation. (jitendra) + + HADOOP-8367 Improve documentation of declaringClassProtocolName in + rpc headers. (Sanjay Radia) + + HADOOP-8624. ProtobufRpcEngine should log all RPCs if TRACE logging is + enabled (todd) + + OPTIMIZATIONS + + BUG FIXES + + HADOOP-8199. Fix issues in start-all.sh and stop-all.sh (Devaraj K via umamahesh) + + HADOOP-7635. RetryInvocationHandler should release underlying resources on + close. (atm) + + HADOOP-7695. RPC.stopProxy can throw unintended exception while logging + error. (atm) + + HADOOP-7833. Fix findbugs warnings in protobuf generated code. + (John Lee via suresh) + + HADOOP-7897. ProtobufRpcEngine client side exception mechanism is not + consistent with WritableRpcEngine. (suresh) + + HADOOP-7913. Fix bug in ProtoBufRpcEngine. (sanjay) + + HADOOP-7892. IPC logs too verbose after "RpcKind" introduction. (todd) + + HADOOP-7968. Errant println left in RPC.getHighestSupportedProtocol. (Sho + Shimauchi via harsh) + + HADOOP-7931. o.a.h.ipc.WritableRpcEngine should have a way to force + initialization. (atm) + + HADOOP-8104. Inconsistent Jackson versions (tucu) + + HADOOP-8119. Fix javac warnings in TestAuthenticationFilter in hadoop-auth. + (szetszwo) + + HADOOP-7888. TestFailoverProxy fails intermittently on trunk. (Jason Lowe + via atm) + + HADOOP-8154. DNS#getIPs shouldn't silently return the local host + IP for bogus interface names. (eli) + + HADOOP-8169. javadoc generation fails with java.lang.OutOfMemoryError: + Java heap space (tgraves via bobby) + + HADOOP-8167. Configuration deprecation logic breaks backwards compatibility (tucu) + + HADOOP-8189. LdapGroupsMapping shouldn't throw away IOException. (Jonathan Natkins via atm) + + HADOOP-8191. SshFenceByTcpPort uses netcat incorrectly (todd) + + HADOOP-8157. Fix race condition in Configuration that could cause spurious + ClassNotFoundExceptions after a GC. (todd) + + HADOOP-8197. Configuration logs WARNs on every use of a deprecated key (tucu) + + HADOOP-8159. NetworkTopology: getLeaf should check for invalid topologies. + (Colin Patrick McCabe via eli) + + HADOOP-8204. TestHealthMonitor fails occasionally (todd) + + HADOOP-8202. RPC stopProxy() does not close the proxy correctly. + (Hari Mankude via suresh) + + HADOOP-8218. RPC.closeProxy shouldn't throw error when closing a mock + (todd) + + HADOOP-8238. NetUtils#getHostNameOfIP blows up if given ip:port + string w/o port. (eli) + + HADOOP-8243. Security support broken in CLI (manual) failover controller + (todd) + + HADOOP-8251. Fix SecurityUtil.fetchServiceTicket after HADOOP-6941 (todd) + + HADOOP-8249. invalid hadoop-auth cookies should trigger authentication + if info is avail before returning HTTP 401 (tucu) + + HADOOP-8261. Har file system doesn't deal with FS URIs with a host but no + port. (atm) + + HADOOP-8263. Stringification of IPC calls not useful (todd) + + HADOOP-8264. Remove irritating double double quotes in front of hostname + (Bernd Fondermann via bobby) + + HADOOP-8270. hadoop-daemon.sh stop action should return 0 for an + already stopped service. (Roman Shaposhnik via eli) + + HADOOP-8144. pseudoSortByDistance in NetworkTopology doesn't work + properly if no local node and first node is local rack node. + (Junping Du) + + HADOOP-8282. start-all.sh refers incorrectly start-dfs.sh + existence for starting start-yarn.sh. (Devaraj K via eli) + + HADOOP-7350. Use ServiceLoader to discover compression codec classes. + (tomwhite) + + HADOOP-8284. clover integration broken, also mapreduce poms are pulling + in clover as a dependency. (phunt via tucu) + + HADOOP-8309. Pseudo & Kerberos AuthenticationHandler should use + getType() to create token (tucu) + + HADOOP-8314. HttpServer#hasAdminAccess should return false if + authorization is enabled but user is not authenticated. (tucu) + + HADOOP-8296. hadoop/yarn daemonlog usage wrong (Devaraj K via tgraves) + + HADOOP-8310. FileContext#checkPath should handle URIs with no port. (atm) + + HADOOP-8321. TestUrlStreamHandler fails. (tucu) + + HADOOP-8325. Add a ShutdownHookManager to be used by different + components instead of the JVM shutdownhook (tucu) + + HADOOP-8275. Range check DelegationKey length. + (Colin Patrick McCabe via eli) + + HADOOP-8342. HDFS command fails with exception following merge of + HADOOP-8325 (tucu) + + HADOOP-8346. Makes oid changes to make SPNEGO work. Was broken due + to fixes introduced by the IBM JDK compatibility patch. (ddas) + + HADOOP-8355. SPNEGO filter throws/logs exception when authentication fails (tucu) + + HADOOP-8349. ViewFS doesn't work when the root of a file system is mounted. (atm) + + HADOOP-8328. Duplicate FileSystem Statistics object for 'file' scheme. + (tomwhite) + + HADOOP-8359. Fix javadoc warnings in Configuration. (Anupam Seth via + szetszwo) + + HADOOP-7988. Upper case in hostname part of the principals doesn't work with + kerberos. (jitendra) + + BREAKDOWN OF HADOOP-7454 SUBTASKS + + HADOOP-7455. HA: Introduce HA Service Protocol Interface. (suresh) + + HADOOP-7774. HA: Administrative CLI to control HA daemons. (todd) + + HADOOP-7896. HA: if both NNs are in Standby mode, client needs to try failing + back and forth several times with sleeps. (atm) + + HADOOP-7922. Improve some logging for client IPC failovers and + StandbyExceptions (todd) + + HADOOP-7921. StandbyException should extend IOException (todd) + + HADOOP-7928. HA: Client failover policy is incorrectly trying to fail over all + IOExceptions (atm) + + HADOOP-7925. Add interface and update CLI to query current state to + HAServiceProtocol (eli via todd) + + HADOOP-7932. Make client connection retries on socket time outs configurable. + (Uma Maheswara Rao G via todd) + + HADOOP-7924. FailoverController for client-based configuration (eli) + + HADOOP-7961. Move HA fencing to common. (eli) + + HADOOP-7970. HAServiceProtocol methods must throw IOException. (Hari Mankude + via suresh). + + HADOOP-7992. Add ZKClient library to facilitate leader election. (Bikas Saha + via suresh). + + HADOOP-7983. HA: failover should be able to pass args to fencers. (eli) + + HADOOP-7938. HA: the FailoverController should optionally fence the active + during failover. (eli) + + HADOOP-7991. HA: the FailoverController should check the standby is ready + before failing over. (eli) + + HADOOP-8038. Add 'ipc.client.connect.max.retries.on.timeouts' entry in + core-default.xml file. (Uma Maheswara Rao G via atm) + + HADOOP-8041. Log a warning when a failover is first attempted (todd) + + HADOOP-8068. void methods can swallow exceptions when going through failover + path (todd) + + HADOOP-8116. RetriableCommand is using RetryPolicy incorrectly after + HADOOP-7896. (atm) + + HADOOP-8317. Update maven-assembly-plugin to 2.3 - fix build on FreeBSD + (Radim Kolar via bobby) + + HADOOP-8172. Configuration no longer sets all keys in a deprecated key + list. (Anupam Seth via bobby) + + HADOOP-7868. Hadoop native fails to compile when default linker + option is -Wl,--as-needed. (Trevor Robinson via eli) + + HADOOP-8655. Fix TextInputFormat for large deliminators. (Gelesh via + bobby) + + HADOOP-7900. LocalDirAllocator confChanged() accesses conf.get() twice + (Ravi Gummadi via Uma Maheswara Rao G) + + HADOOP-8146. FsShell commands cannot be interrupted + (Daryn Sharp via Uma Maheswara Rao G) + + HADOOP-8018. Hudson auto test for HDFS has started throwing javadoc + (Jon Eagles via bobby) + + HADOOP-8001 ChecksumFileSystem's rename doesn't correctly handle checksum + files. (Daryn Sharp via bobby) + + HADOOP-8006 TestFSInputChecker is failing in trunk. + (Daryn Sharp via bobby) + + HADOOP-7998. CheckFileSystem does not correctly honor setVerifyChecksum + (Daryn Sharp via bobby) + + HADOOP-7606. Upgrade Jackson to version 1.7.1 to match the version required + by Jersey (Alejandro Abdelnur via atm) + +Release 0.23.9 - UNRELEASED + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + +Release 0.23.8 - 2013-06-05 + + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + + HADOOP-9222. Cover package with org.apache.hadoop.io.lz4 unit tests (Vadim + Bondarev via jlowe) + + HADOOP-9233. Cover package org.apache.hadoop.io.compress.zlib with unit + tests (Vadim Bondarev via jlowe) + + HADOOP-9469. mapreduce/yarn source jars not included in dist tarball + (Robert Parker via tgraves) + + HADOOP-9504. MetricsDynamicMBeanBase has concurrency issues in + createMBeanInfo (Liang Xie via jlowe) + + HADOOP-9614. smart-test-patch.sh hangs for new version of patch (2.7.1) + (Ravi Prakash via jeagles) + +Release 0.23.7 - 2013-04-18 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + HADOOP-8849. FileUtil#fullyDelete should grant the target directories +rwx + permissions (Ivan A. Veselovsky via bobby) + + HADOOP-9067. provide test for LocalFileSystem.reportChecksumFailure + (Ivan A. Veselovsky via bobby) + + HADOOP-9336. Allow UGI of current connection to be queried. (Daryn Sharp + via kihwal) + + HADOOP-9352. Expose UGI.setLoginUser for tests (daryn) + + HADOOP-9209. Add shell command to dump file checksums (Todd Lipcon via + jeagles) + + HADOOP-9374. Add tokens from -tokenCacheFile into UGI (daryn) + + HADOOP-8711. IPC Server supports adding exceptions for which + the message is printed and the stack trace is not printed to avoid chatter. + (Brandon Li via Suresh) + + + OPTIMIZATIONS + + HADOOP-8462. Native-code implementation of bzip2 codec. (Govind Kamat via + jlowe) + + BUG FIXES + + HADOOP-9302. HDFS docs not linked from top level (Andy Isaacson via + tgraves) + + HADOOP-9303. command manual dfsadmin missing entry for restoreFailedStorage + option (Andy Isaacson via tgraves) + + HADOOP-9339. IPC.Server incorrectly sets UGI auth type (Daryn Sharp via + kihwal) + +Release 0.23.6 - 2013-02-06 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + HADOOP-9217. Print thread dumps when hadoop-common tests fail. + (Andrey Klochkov via suresh) + + HADOOP-9247. Parametrize Clover "generateXxx" properties to make them + re-definable via -D in mvn calls. (Ivan A. Veselovsky via suresh) + + + OPTIMIZATIONS + + BUG FIXES + + HADOOP-9072. Hadoop-Common-0.23-Build Fails to build in Jenkins + (Robert Parker via tgraves) + + HADOOP-8992. Enhance unit-test coverage of class HarFileSystem (Ivan A. + Veselovsky via bobby) + + HADOOP-9038. unit-tests for AllocatorPerContext.PathIterator (Ivan A. + Veselovsky via bobby) + + HADOOP-9105. FsShell -moveFromLocal erroneously fails (daryn via bobby) + + HADOOP-9097. Maven RAT plugin is not checking all source files (tgraves) + + HADOOP-9255. relnotes.py missing last jira (tgraves) + +Release 0.23.5 - 2012-11-28 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + HADOOP-8932. JNI-based user-group mapping modules can be too chatty on + lookup failures. (Kihwal Lee via suresh) + + HADOOP-8930. Cumulative code coverage calculation (Andrey Klochkov via + bobby) + + OPTIMIZATIONS + + BUG FIXES + + HADOOP-8906. paths with multiple globs are unreliable. (Daryn Sharp via + jlowe) + + HADOOP-8811. Compile hadoop native library in FreeBSD (Radim Kolar via + bobby) + + HADOOP-8962. RawLocalFileSystem.listStatus fails when a child filename + contains a colon (jlowe via bobby) + + HADOOP-8986. Server$Call object is never released after it is sent (bobby) + + HADOOP-9022. Hadoop distcp tool fails to copy file if -m 0 specified + (Jonathan Eagles vai bobby) + + HADOOP-9025. org.apache.hadoop.tools.TestCopyListing failing (Jonathan + Eagles via jlowe) + +Release 0.23.4 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + HADOOP-8822. relnotes.py was deleted post mavenization (bobby) + + OPTIMIZATIONS + + BUG FIXES + + HADOOP-8843. Old trash directories are never deleted on upgrade + from 1.x (jlowe) + + HADOOP-8684. Deadlock between WritableComparator and WritableComparable. + (Jing Zhao via suresh) + +Release 0.23.3 + + INCOMPATIBLE CHANGES + + HADOOP-7967. Need generalized multi-token filesystem support (daryn) + + NEW FEATURES + + IMPROVEMENTS + + HADOOP-8108. Move method getHostPortString() from NameNode to NetUtils. + (Brandon Li via jitendra) + + HADOOP-8288. Remove references of mapred.child.ulimit etc. since they are + not being used any more (Ravi Prakash via bobby) + + HADOOP-8535. Cut hadoop build times in half (Job Eagles via bobby) + + HADOOP-8525. Provide Improved Traceability for Configuration (bobby) + + OPTIMIZATIONS + + BUG FIXES + + HADOOP-8088. User-group mapping cache incorrectly does negative caching on + transient failures (Khiwal Lee via bobby) + + HADOOP-8179. risk of NPE in CopyCommands processArguments() (Daryn Sharp + via bobby) + + HADOOP-6963. In FileUtil.getDU(..), neither include the size of directories + nor follow symbolic links. (Ravi Prakash via szetszwo) + + HADOOP-8180. Remove hsqldb since its not needed from pom.xml (Ravi Prakash + via tgraves) + + HADOOP-8014. ViewFileSystem does not correctly implement getDefaultBlockSize, + getDefaultReplication, getContentSummary (John George via bobby) + + HADOOP-7510. Tokens should use original hostname provided instead of ip + (Daryn Sharp via bobby) + + HADOOP-8283. Allow tests to control token service value (Daryn Sharp via + bobby) + + HADOOP-8286. Simplify getting a socket address from conf (Daryn Sharp via + bobby) + + HADOOP-8227. Allow RPC to limit ephemeral port range. (bobby) + + HADOOP-8305. distcp over viewfs is broken (John George via bobby) + + HADOOP-8334. HttpServer sometimes returns incorrect port (Daryn Sharp via + bobby) + + HADOOP-8330. Update TestSequenceFile.testCreateUsesFsArg() for HADOOP-8305. + (John George via szetszwo) + + HADOOP-8335. Improve Configuration's address handling (Daryn Sharp via + bobby) + + HADOOP-8327. distcpv2 and distcpv1 jars should not coexist (Dave Thompson + via bobby) + + HADOOP-8341. Fix or filter findbugs issues in hadoop-tools (bobby) + + HADOOP-8373. Port RPC.getServerAddress to 0.23 (Daryn Sharp via bobby) + + HADOOP-8495. Update Netty to avoid leaking file descriptors during shuffle + (Jason Lowe via tgraves) + + HADOOP-8129. ViewFileSystemTestSetup setupForViewFileSystem is erring + (Ahmed Radwan and Ravi Prakash via bobby) + + HADOOP-8573. Configuration tries to read from an inputstream resource + multiple times (Robert Evans via tgraves) + + HADOOP-8599. Non empty response from FileSystem.getFileBlockLocations when + asking for data beyond the end of file. (Andrey Klochkov via todd) + + HADOOP-8606. FileSystem.get may return the wrong filesystem (Daryn Sharp + via bobby) + + HADOOP-8551. fs -mkdir creates parent directories without the -p option + (John George via bobby) + + HADOOP-8613. AbstractDelegationTokenIdentifier#getUser() should set token + auth type. (daryn) + + HADOOP-8627. FS deleteOnExit may delete the wrong path (daryn via bobby) + + HADOOP-8634. Ensure FileSystem#close doesn't squawk for deleteOnExit paths + (daryn via bobby) + + HADOOP-8550. hadoop fs -touchz automatically created parent directories + (John George via bobby) + + HADOOP-8635. Cannot cancel paths registered deleteOnExit (daryn via bobby) + + HADOOP-8637. FilterFileSystem#setWriteChecksum is broken (daryn via bobby) + + HADOOP-8370. Native build failure: javah: class file for + org.apache.hadoop.classification.InterfaceAudience not found (Trevor + Robinson via tgraves) + + HADOOP-8633. Interrupted FsShell copies may leave tmp files (Daryn Sharp + via tgraves) + + HADOOP-8703. distcpV2: turn CRC checking off for 0 byte size (Dave + Thompson via bobby) + + HADOOP-8390. TestFileSystemCanonicalization fails with JDK7 (Trevor + Robinson via tgraves) + + HADOOP-8692. TestLocalDirAllocator fails intermittently with JDK7 + (Trevor Robinson via tgraves) + + HADOOP-8693. TestSecurityUtil fails intermittently with JDK7 (Trevor + Robinson via tgraves) + + HADOOP-8697. TestWritableName fails intermittently with JDK7 (Trevor + Robinson via tgraves) + + HADOOP-8695. TestPathData fails intermittently with JDK7 (Trevor + Robinson via tgraves) + + HADOOP-8611. Allow fall-back to the shell-based implementation when + JNI-based users-group mapping fails (Robert Parker via bobby) + + HADOOP-8225. DistCp fails when invoked by Oozie (daryn via bobby) + + HADOOP-8709. globStatus changed behavior from 0.20/1.x (Jason Lowe via + bobby) + + HADOOP-8725. MR is broken when security is off (daryn via bobby) + + HADOOP-8726. The Secrets in Credentials are not available to MR tasks + (daryn and Benoy Antony via bobby) + + HADOOP-8727. Gracefully deprecate dfs.umaskmode in 2.x onwards (Harsh J + via bobby) + +Release 0.23.2 - UNRELEASED + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + HADOOP-8048. Allow merging of Credentials (Daryn Sharp via tgraves) + + HADOOP-8032. mvn site:stage-deploy should be able to use the scp protocol + to stage documents (Ravi Prakash via tgraves) + + HADOOP-7923. Automate the updating of version numbers in the doc system. + (szetszwo) + + HADOOP-8137. Added links to CLI manuals to the site. (tgraves via + acmurthy) + + OPTIMIZATIONS + HADOOP-8071. Avoid an extra packet in client code when nagling is + disabled. (todd) + + HADOOP-6502. Improve the performance of Configuration.getClassByName when + the class is not found by caching negative results. + (sharad, todd via todd) + + BUG FIXES + + HADOOP-7660. Maven generated .classpath doesnot includes + "target/generated-test-source/java" as source directory. + (Laxman via bobby) + + HADOOP-8042 When copying a file out of HDFS, modifying it, and uploading + it back into HDFS, the put fails due to a CRC mismatch + (Daryn Sharp via bobby) + + HADOOP-8035 Hadoop Maven site is inefficient and runs phases redundantly + (abayer via tucu) + + HADOOP-8051 HttpFS documentation it is not wired to the generated site (tucu) + + HADOOP-8055. Hadoop tarball distribution lacks a core-site.xml (harsh) + + HADOOP-8052. Hadoop Metrics2 should emit Float.MAX_VALUE (instead of + Double.MAX_VALUE) to avoid making Ganglia's gmetad core. (Varun Kapoor + via mattf) + + HADOOP-8074. Small bug in hadoop error message for unknown commands. + (Colin Patrick McCabe via eli) + + HADOOP-8082 add hadoop-client and hadoop-minicluster to the + dependency-management section. (tucu) + + HADOOP-8066 The full docs build intermittently fails (abayer via tucu) + + HADOOP-8083 javadoc generation for some modules is not done under target/ (tucu) + + HADOOP-8036. TestViewFsTrash assumes the user's home directory is + 2 levels deep. (Colin Patrick McCabe via eli) + + HADOOP-8046 Revert StaticMapping semantics to the existing ones, add DNS + mapping diagnostics in progress (stevel) + + HADOOP-8057 hadoop-setup-conf.sh not working because of some extra spaces. + (Vinayakumar B via stevel) + + HADOOP-7680 TestHardLink fails on Mac OS X, when gnu stat is in path. + (Milind Bhandarkar via stevel) + + HADOOP-8050. Deadlock in metrics. (Kihwal Lee via mattf) + + HADOOP-8131. FsShell put doesn't correctly handle a non-existent dir + (Daryn Sharp via bobby) + + HADOOP-8123. Use java.home rather than env.JAVA_HOME for java in the + project. (Jonathan Eagles via acmurthy) + + HADOOP-8064. Remove unnecessary dependency on w3c.org in document processing + (Khiwal Lee via bobby) + + HADOOP-8140. dfs -getmerge should process its argments better (Daryn Sharp + via bobby) + + HADOOP-8164. Back slash as path separator is handled for Windows only. + (Daryn Sharp via suresh) + + HADOOP-8173. FsShell needs to handle quoted metachars. (Daryn Sharp via + szetszwo) + + HADOOP-8175. Add -p option to mkdir in FsShell. (Daryn Sharp via szetszwo) + + HADOOP-8176. Disambiguate the destination of FsShell copies (Daryn Sharp + via bobby) + + HADOOP-8208. Disallow self failover. (eli) + +Release 0.23.1 - 2012-02-17 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + HADOOP-7777 Implement a base class for DNSToSwitchMapping implementations + that can offer extra topology information. (stevel) + + HADOOP-7657. Add support for LZ4 compression. (Binglin Chang via todd) + + HADOOP-7910. Add Configuration.getLongBytes to handle human readable byte size values. (Sho Shimauchi via harsh) + + + IMPROVEMENTS + + HADOOP-7801. HADOOP_PREFIX cannot be overriden. (Bruno Mahé via tomwhite) + + HADOOP-7802. Hadoop scripts unconditionally source + "$bin"/../libexec/hadoop-config.sh. (Bruno Mahé via tomwhite) + + HADOOP-7858. Drop some info logging to DEBUG level in IPC, + metrics, and HTTP. (todd via eli) + + HADOOP-7424. Log an error if the topology script doesn't handle multiple args. + (Uma Maheswara Rao G via eli) + + HADOOP-7804. Enable hadoop config generator to set configurations to enable + short circuit read. (Arpit Gupta via jitendra) + + HADOOP-7877. Update balancer CLI usage documentation to include the new + -policy option. (szetszwo) + + HADOOP-6840. Support non-recursive create() in FileSystem and + SequenceFile.Writer. (jitendra and eli via eli) + + HADOOP-6886. LocalFileSystem Needs createNonRecursive API. + (Nicolas Spiegelberg and eli via eli) + + HADOOP-7912. test-patch should run eclipse:eclipse to verify that it does + not break again. (Robert Joseph Evans via tomwhite) + + HADOOP-7890. Redirect hadoop script's deprecation message to stderr. + (Koji Knoguchi via mahadev) + + HADOOP-7504. Add the missing Ganglia31 opts to hadoop-metrics.properties as a comment. (harsh) + + HADOOP-7933. Add a getDelegationTokens api to FileSystem which checks + for known tokens in the passed Credentials object. (sseth) + + HADOOP-7737. normalize hadoop-mapreduce & hadoop-dist dist/tar build with + common/hdfs. (tucu) + + HADOOP-7743. Add Maven profile to create a full source tarball. (tucu) + + HADOOP-7758. Make GlobFilter class public. (tucu) + + HADOOP-7590. Mavenize streaming and MR examples. (tucu) + + HADOOP-7934. Normalize dependencies versions across all modules. (tucu) + + HADOOP-7348. Change 'addnl' in getmerge util to be a flag '-nl' instead. + (XieXianshan via harsh) + + HADOOP-7975. Add LZ4 as an entry in the default codec list, missed by HADOOP-7657 (harsh) + + HADOOP-4515. Configuration#getBoolean must not be case sensitive. (Sho Shimauchi via harsh) + + HADOOP-6490. Use StringUtils over String#replace in Path#normalizePath. + (Uma Maheswara Rao G via harsh) + + HADOOP-7574. Improve FSShell -stat, add user/group elements. + (XieXianshan via harsh) + + HADOOP-7736. Remove duplicate Path#normalizePath call. (harsh) + + HADOOP-7919. Remove the unused hadoop.logfile.* properties from the + core-default.xml file. (harsh) + + HADOOP-7939. Improve Hadoop subcomponent integration in Hadoop 0.23. (rvs via tucu) + + HADOOP-8002. SecurityUtil acquired token message should be a debug rather than info. + (Arpit Gupta via mahadev) + + HADOOP-8009. Create hadoop-client and hadoop-minicluster artifacts for downstream + projects. (tucu) + + HADOOP-7470. Move up to Jackson 1.8.8. (Enis Soztutar via szetszwo) + + HADOOP-8027. Visiting /jmx on the daemon web interfaces may print + unnecessary error in logs. (atm) + + HADOOP-7792. Add verifyToken method to AbstractDelegationTokenSecretManager. + (jitendra) + + OPTIMIZATIONS + + BUG FIXES + + HADOOP-7811. TestUserGroupInformation#testGetServerSideGroups test fails in chroot. + (Jonathan Eagles via mahadev) + + HADOOP-7813. Fix test-patch to use proper numerical comparison when checking + javadoc and findbugs warning counts. (Jonathan Eagles via tlipcon) + + HADOOP-7841. Run tests with non-secure random. (tlipcon) + + HADOOP-7851. Configuration.getClasses() never returns the default value. + (Uma Maheswara Rao G via amarrk) + + HADOOP-7787. Make source tarball use conventional name. + (Bruno Mahé via tomwhite) + + HADOOP-6614. RunJar should provide more diags when it can't create + a temp file. (Jonathan Hsieh via eli) + + HADOOP-7859. TestViewFsHdfs.testgetFileLinkStatus is failing an assert. (eli) + + HADOOP-7864. Building mvn site with Maven < 3.0.2 causes OOM errors. + (Andrew Bayer via eli) + + HADOOP-7854. UGI getCurrentUser is not synchronized. (Daryn Sharp via jitendra) + + HADOOP-7870. fix SequenceFile#createWriter with boolean + createParent arg to respect createParent. (Jon Hsieh via eli) + + HADOOP-7898. Fix javadoc warnings in AuthenticationToken.java. (suresh) + + HADOOP-7878 Regression: HADOOP-7777 switch changes break HDFS tests when the + isSingleSwitch() predicate is used. (stevel) + + HADOOP-7914. Remove the duplicated declaration of hadoop-hdfs test-jar in + hadoop-project/pom.xml. (szetszwo) + + HADOOP-7837. no NullAppender in the log4j config. (eli) + + HADOOP-7948. Shell scripts created by hadoop-dist/pom.xml to build tar do not + properly propagate failure. (cim_michajlomatijkiw via tucu) + + HADOOP-7949. Updated maxIdleTime default in the code to match + core-default.xml (eli) + + HADOOP-7853. multiple javax security configurations cause conflicts. + (daryn via tucu) + + HDFS-2614. hadoop dist tarball is missing hdfs headers. (tucu) + + HADOOP-7874. native libs should be under lib/native/ dir. (tucu) + + HADOOP-7887. KerberosAuthenticatorHandler is not setting + KerberosName name rules from configuration. (tucu) + + HADOOP-7902. skipping name rules setting (if already set) should be done + on UGI initialization only. (tucu) + + HADOOP-7810. move hadoop archive to core from tools. (tucu) + + HADOOP_7917. compilation of protobuf files fails in windows/cygwin. (tucu) + + HADOOP-7907. hadoop-tools JARs are not part of the distro. (tucu) + + HADOOP-7936. There's a Hoop README in the root dir of the tarball. (tucu) + + HADOOP-7963. Fix ViewFS to catch a null canonical service-name and pass + tests TestViewFileSystem* (Siddharth Seth via vinodkv) + + HADOOP-7964. Deadlock in NetUtils and SecurityUtil class initialization. + (Daryn Sharp via suresh) + + HADOOP-7974. TestViewFsTrash incorrectly determines the user's home + directory. (harsh via eli) + + HADOOP-7971. Adding back job/pipes/queue commands to bin/hadoop for + backward compatibility. (Prashath Sharma via acmurthy) + + HADOOP-7982. UserGroupInformation fails to login if thread's context + classloader can't load HadoopLoginModule. (todd) + + HADOOP-7986. Adding config for MapReduce History Server protocol in + hadoop-policy.xml for service level authorization. (Mahadev Konar via vinodkv) + + HADOOP-7981. Improve documentation for org.apache.hadoop.io.compress. + Decompressor.getRemaining (Jonathan Eagles via mahadev) + + HADOOP-7997. SequenceFile.createWriter(...createParent...) no + longer works on existing file. (Gregory Chanan via eli) + + HADOOP-7993. Hadoop ignores old-style config options for enabling compressed + output. (Anupam Seth via mahadev) + + HADOOP-8000. fetchdt command not available in bin/hadoop. + (Arpit Gupta via mahadev) + + HADOOP-7999. "hadoop archive" fails with ClassNotFoundException. + (Jason Lowe via mahadev) + + HADOOP-8012. hadoop-daemon.sh and yarn-daemon.sh are trying to mkdir + and chown log/pid dirs which can fail. (Roman Shaposhnik via eli) + + HADOOP-8013. ViewFileSystem does not honor setVerifyChecksum + (Daryn Sharp via bobby) + + HADOOP-8054 NPE with FilterFileSystem (Daryn Sharp via bobby) + +Release 0.23.0 - 2011-11-01 + + INCOMPATIBLE CHANGES + + HADOOP-6904. Support method based RPC compatiblity. (hairong) + + HADOOP-6432. Add Statistics support in FileContext. (jitendra) + + HADOOP-7136. Remove failmon contrib component. (nigel) + + NEW FEATURES + + HADOOP-7324. Ganglia plugins for metrics v2. (Priyo Mustafi via llu) + + HADOOP-7342. Add an utility API in FileUtil for JDK File.list + avoid NPEs on File.list() (Bharath Mundlapudi via mattf) + + HADOOP-7322. Adding a util method in FileUtil for directory listing, + avoid NPEs on File.listFiles() (Bharath Mundlapudi via mattf) + + HADOOP-7023. Add listCorruptFileBlocks to Filesysem. (Patrick Kling + via hairong) + + HADOOP-7096. Allow setting of end-of-record delimiter for TextInputFormat + (Ahmed Radwan via todd) + + HADOOP-6994. Api to get delegation token in AbstractFileSystem. (jitendra) + + HADOOP-7171. Support UGI in FileContext API. (jitendra) + + HADOOP-7257 Client side mount tables (sanjay) + + HADOOP-6919. New metrics2 framework. (Luke Lu via acmurthy) + + HADOOP-6920. Metrics instrumentation to move new metrics2 framework. + (Luke Lu via suresh) + + HADOOP-7214. Add Common functionality necessary to provide an equivalent + of /usr/bin/groups for Hadoop. (Aaron T. Myers via todd) + + HADOOP-6832. Add an authentication plugin using a configurable static user + for the web UI. (Owen O'Malley and Todd Lipcon via cdouglas) + + HADOOP-7144. Expose JMX metrics via JSON servlet. (Robert Joseph Evans via + cdouglas) + + HADOOP-7379. Add the ability to serialize and deserialize protocol buffers + in ObjectWritable. (todd) + + HADOOP-7206. Support Snappy compression. (Issei Yoshida and + Alejandro Abdelnur via eli) + + HADOOP-7329. Add the capability of getting invividual attribute of a mbean + using JMXProxyServlet. (tanping) + + HADOOP-7380. Add client failover functionality to o.a.h.io.(ipc|retry). + (atm via eli) + + HADOOP-7460. Support pluggable trash policies. (Usman Masoon via suresh) + + HADOOP-6385. dfs should support -rmdir (was HDFS-639). (Daryn Sharp + via mattf) + + HADOOP-7119. add Kerberos HTTP SPNEGO authentication support to Hadoop + JT/NN/DN/TT web-consoles. (Alejandro Abdelnur via atm) + + IMPROVEMENTS + + HADOOP-7655. Provide a small validation script that smoke tests the installed + cluster. (Arpit Gupta via mattf) + + HADOOP-7042. Updates to test-patch.sh to include failed test names and + improve other messaging. (nigel) + + HADOOP-7001. Configuration changes can occur via the Reconfigurable + interface. (Patrick Kling via dhruba) + + HADOOP-6764. Add number of reader threads and queue length as + configuration parameters in RPC.getServer. (Dmytro Molkov via hairong) + + HADOOP-7049. TestReconfiguration should be junit v4. + (Patrick Kling via eli) + + HADOOP-7054 Change NN LoadGenerator to use FileContext APIs + (Sanjay Radia) + + HADOOP-7060. A more elegant FileSystem#listCorruptFileBlocks API. + (Patrick Kling via hairong) + + HADOOP-7058. Expose number of bytes in FSOutputSummer buffer to + implementatins. (Todd Lipcon via hairong) + + HADOOP-7061. unprecise javadoc for CompressionCodec. (Jingguo Yao via eli) + + HADOOP-7059. Remove "unused" warning in native code. (Noah Watkins via eli) + + HADOOP-6864. Provide a JNI-based implementation of + ShellBasedUnixGroupsNetgroupMapping + (implementation of GroupMappingServiceProvider) (Erik Seffl via boryas) + + HADOOP-7078. Improve javadocs for RawComparator interface. + (Harsh J Chouraria via todd) + + HADOOP-6995. Allow wildcards to be used in ProxyUsers configurations. + (todd) + + HADOOP-6376. Add a comment header to conf/slaves that specifies the file + format. (Kay Kay via todd) + + HADOOP-7151. Document need for stable hashCode() in WritableComparable. + (Dmitriy V. Ryaboy via todd) + + HADOOP-7112. Issue a warning when GenericOptionsParser libjars are not on + local filesystem. (tomwhite) + + HADOOP-7114. FsShell should dump all exceptions at DEBUG level. + (todd via tomwhite) + + HADOOP-7159. RPC server should log the client hostname when read exception + happened. (Scott Chen via todd) + + HADOOP-7167. Allow using a file to exclude certain tests from build. (todd) + + HADOOP-7133. Batch the calls in DataStorage to FileUtil.createHardLink(). + (Matt Foley via jghoman) + + HADOOP-7166. Add DaemonFactory to common. (Erik Steffl & jitendra) + + HADOOP-7175. Add isEnabled() to Trash. (Daryn Sharp via szetszwo) + + HADOOP-7180. Better support on CommandFormat on the API and exceptions. + (Daryn Sharp via szetszwo) + + HADOOP-7202. Improve shell Command base class. (Daryn Sharp via szetszwo) + + HADOOP-7224. Add CommandFactory to shell. (Daryn Sharp via szetszwo) + + HADOOP-7014. Generalize CLITest structure and interfaces to facilitate + upstream adoption (e.g. for web testing). (cos) + + HADOOP-7230. Move "fs -help" shell command tests from HDFS to COMMOM; see + also HDFS-1844. (Daryn Sharp via szetszwo) + + HADOOP-7233. Refactor ls to conform to new FsCommand class. (Daryn Sharp + via szetszwo) + + HADOOP-7235. Refactor the tail command to conform to new FsCommand class. + (Daryn Sharp via szetszwo) + + HADOOP-7179. Federation: Improve HDFS startup scripts. (Erik Steffl + and Tanping Wang via suresh) + + HADOOP-7227. Remove protocol version check at proxy creation in Hadoop + RPC. (jitendra) + + HADOOP-7236. Refactor the mkdir command to conform to new FsCommand class. + (Daryn Sharp via szetszwo) + + HADOOP-7250. Refactor the setrep command to conform to new FsCommand class. + (Daryn Sharp via szetszwo) + + HADOOP-7249. Refactor the chmod/chown/chgrp command to conform to new + FsCommand class. (Daryn Sharp via szetszwo) + + HADOOP-7251. Refactor the getmerge command to conform to new FsCommand + class. (Daryn Sharp via szetszwo) + + HADOOP-7265. Keep track of relative paths in PathData. (Daryn Sharp + via szetszwo) + + HADOOP-7238. Refactor the cat and text commands to conform to new FsCommand + class. (Daryn Sharp via szetszwo) + + HADOOP-7271. Standardize shell command error messages. (Daryn Sharp + via szetszwo) + + HADOOP-7272. Remove unnecessary security related info logs. (suresh) + + HADOOP-7275. Refactor the stat command to conform to new FsCommand + class. (Daryn Sharp via szetszwo) + + HADOOP-7237. Refactor the touchz command to conform to new FsCommand + class. (Daryn Sharp via szetszwo) + + HADOOP-7267. Refactor the rm/rmr/expunge commands to conform to new + FsCommand class. (Daryn Sharp via szetszwo) + + HADOOP-7285. Refactor the test command to conform to new FsCommand + class. (Daryn Sharp via todd) + + HADOOP-7289. In ivy.xml, test conf should not extend common conf. + (Eric Yang via szetszwo) + + HADOOP-7291. Update Hudson job not to run test-contrib. (Nigel Daley via eli) + + HADOOP-7286. Refactor the du/dus/df commands to conform to new FsCommand + class. (Daryn Sharp via todd) + + HADOOP-7301. FSDataInputStream should expose a getWrappedStream method. + (Jonathan Hsieh via eli) + + HADOOP-7306. Start metrics system even if config files are missing + (Luke Lu via todd) + + HADOOP-7302. webinterface.private.actions should be renamed and moved to + the MapReduce project. (Ari Rabkin via todd) + + HADOOP-7329. Improve help message for "df" to include "-h" flag. + (Xie Xianshan via todd) + + HADOOP-7320. Refactor the copy and move commands to conform to new + FsCommand class. (Daryn Sharp via todd) + + HADOOP-7312. Update value of hadoop.common.configuration.version. + (Harsh J Chouraria via todd) + + HADOOP-7337. Change PureJavaCrc32 annotations to public stable. (szetszwo) + + HADOOP-7331. Make hadoop-daemon.sh return exit code 1 if daemon processes + did not get started. (Tanping Wang via todd) + + HADOOP-7316. Add public javadocs to FSDataInputStream and + FSDataOutputStream. (eli) + + HADOOP-7323. Add capability to resolve compression codec based on codec + name. (Alejandro Abdelnur via tomwhite) + + HADOOP-1886. Undocumented parameters in FilesSystem. (Frank Conrad via eli) + + HADOOP-7375. Add resolvePath method to FileContext. (Sanjay Radia via eli) + + HADOOP-7383. HDFS needs to export protobuf library dependency in pom. + (todd via eli) + + HADOOP-7374. Don't add tools.jar to the classpath when running Hadoop. + (eli) + + HADOOP-7106. Reorganize project SVN layout to "unsplit" the projects. + (todd, nigel) + + HADOOP-6605. Add JAVA_HOME detection to hadoop-config. (eli) + + HADOOP-7384. Allow test-patch to be more flexible about patch format. (todd) + + HADOOP-6929. RPC should have a way to pass Security information other than + protocol annotations. (sharad and omalley via mahadev) + + HADOOP-7385. Remove StringUtils.stringifyException(ie) in logger functions. + (Bharath Mundlapudi via Tanping Wang). + + HADOOP-310. Additional constructor requested in BytesWritable. (Brock + Noland via atm) + + HADOOP-7429. Add another IOUtils#copyBytes method. (eli) + + HADOOP-7451. Generalize StringUtils#join. (Chris Douglas via mattf) + + HADOOP-7449. Add Data(In,Out)putByteBuffer to work with ByteBuffer similar + to Data(In,Out)putBuffer for byte[]. Merge from yahoo-merge branch, + -r 1079163. Fix missing Apache license headers. (Chris Douglas via mattf) + + HADOOP-7361. Provide an option, -overwrite/-f, in put and copyFromLocal + shell commands. (Uma Maheswara Rao G via szetszwo) + + HADOOP-7430. Improve error message when moving to trash fails due to + quota issue. (Ravi Prakash via mattf) + + HADOOP-7444. Add Checksum API to verify and calculate checksums "in bulk" + (todd) + + HADOOP-7443. Add CRC32C as another DataChecksum implementation (todd) + + HADOOP-7305. Eclipse project files are incomplete. (Niels Basjes via eli) + + HADOOP-7314. Add support for throwing UnknownHostException when a host doesn't + resolve. (Jeffrey Naisbitt via jitendra) + + HADOOP-7465. A several tiny improvements for the LOG format. + (Xie Xianshan via eli) + + HADOOP-7434. Display error when using "daemonlog -setlevel" with + illegal level. (yanjinshuang via eli) + + HADOOP-7463. Adding a configuration parameter to SecurityInfo interface. + (mahadev) + + HADOOP-7298. Add test utility for writing multi-threaded tests. (todd and + Harsh J Chouraria via todd) + + HADOOP-7485. Add -h option to ls to list file sizes in human readable + format. (XieXianshan via suresh) + + HADOOP-7378. Add -d option to ls to not expand directories. + (Daryn Sharp via suresh) + + HADOOP-7474. Refactor ClientCache out of WritableRpcEngine. (jitendra) + + HADOOP-7491. hadoop command should respect HADOOP_OPTS when given + a class name. (eli) + + HADOOP-7178. Add a parameter, useRawLocalFileSystem, to copyToLocalFile(..) + in FileSystem. (Uma Maheswara Rao G via szetszwo) + + HADOOP-6671. Use maven for hadoop common builds. (Alejandro Abdelnur + via tomwhite) + + HADOOP-7502. Make generated sources IDE friendly. + (Alejandro Abdelnur via llu) + + HADOOP-7501. Publish Hadoop Common artifacts (post HADOOP-6671) to Apache + SNAPSHOTs repo. (Alejandro Abdelnur via tomwhite) + + HADOOP-7525. Make arguments to test-patch optional. (tomwhite) + + HADOOP-7472. RPC client should deal with IP address change. + (Kihwal Lee via suresh) + + HADOOP-7499. Add method for doing a sanity check on hostnames in NetUtils. + (Jeffrey Naisbit via mahadev) + + HADOOP-6158. Move CyclicIteration to HDFS. (eli) + + HADOOP-7526. Add TestPath tests for URI conversion and reserved + characters. (eli) + + HADOOP-7531. Add servlet util methods for handling paths in requests. (eli) + + HADOOP-7493. Add ShortWritable. (Uma Maheswara Rao G via szetszwo) + + HADOOP-7555. Add a eclipse-generated files to .gitignore. (atm) + + HADOOP-7264. Bump avro version to at least 1.4.1. (Alejandro Abdelnur via + tomwhite) + + HADOOP-7498. Remove legacy TAR layout creation. (Alejandro Abdelnur via + tomwhite) + + HADOOP-7496. Break Maven TAR & bintar profiles into just LAYOUT & TAR proper. + (Alejandro Abdelnur via tomwhite) + + HADOOP-7561. Make test-patch only run tests for changed modules. (tomwhite) + + HADOOP-7547. Add generic type in WritableComparable subclasses. + (Uma Maheswara Rao G via szetszwo) + + HADOOP-7579. Rename package names from alfredo to auth. + (Alejandro Abdelnur via szetszwo) + + HADOOP-7594. Support HTTP REST in HttpServer. (szetszwo) + + HADOOP-7552. FileUtil#fullyDelete doesn't throw IOE but lists it + in the throws clause. (eli) + + HADOOP-7580. Add a version of getLocalPathForWrite to LocalDirAllocator + which doesn't create dirs. (Chris Douglas & Siddharth Seth via acmurthy) + + HADOOP-7507. Allow ganglia metrics to include the metrics system tags + in the gmetric names. (Alejandro Abdelnur via todd) + + HADOOP-7612. Change test-patch to run tests for all nested modules. + (tomwhite) + + HADOOP-7599. Script improvements to setup a secure Hadoop cluster + (Eric Yang via ddas) + + HADOOP-7639. Enhance HttpServer to allow passing path-specs for filtering, + so that servers like Yarn WebApp can get filtered the paths served by + their own injected servlets. (Thomas Graves via vinodkv) + + HADOOP-7575. Enhanced LocalDirAllocator to support fully-qualified + paths. (Jonathan Eagles via vinodkv) + + HADOOP-7469 Add a standard handler for socket connection problems which + improves diagnostics (Uma Maheswara Rao G and stevel via stevel) + + HADOOP-7710. Added hadoop-setup-application.sh for creating + application directory (Arpit Gupta via Eric Yang) + + HADOOP-7707. Added toggle for dfs.support.append, webhdfs and hadoop proxy + user to setup config script. (Arpit Gupta via Eric Yang) + + HADOOP-7720. Added parameter for HBase user to setup config script. + (Arpit Gupta via Eric Yang) + + HADOOP-7624. Set things up for a top level hadoop-tools module. (tucu) + + HADOOP-7627. Improve MetricsAsserts to give more understandable output + on failure. (todd) + + HADOOP-7642. create hadoop-dist module where TAR stitching would happen. + (Thomas White via tucu) + + HADOOP-7709. Running a set of methods in a Single Test Class. + (Jonathan Eagles via mahadev) + + HADOOP-7705. Add a log4j back end that can push out JSON data, + one per line. (stevel) + + HADOOP-7749. Add a NetUtils createSocketAddr call which provides more + help in exception messages. (todd) + + HADOOP-7762. Common side of MR-2736. (eli) + + HADOOP-7668. Add a NetUtils method that can tell if an InetAddress + belongs to local host. (suresh) + + HADOOP-7509. Improve exception message thrown when Authentication is + required. (Ravi Prakash via suresh) + + HADOOP-7745. Fix wrong variable name in exception message introduced + in HADOOP-7509. (Ravi Prakash via suresh) + + MAPREDUCE-2764. Fix renewal of dfs delegation tokens. (Owen via jitendra) + + HADOOP-7360. Preserve relative paths that do not contain globs in FsShell. + (Daryn Sharp and Kihwal Lee via szetszwo) + + HADOOP-7771. FsShell -copyToLocal, -get, etc. commands throw NPE if the + destination directory does not exist. (John George and Daryn Sharp + via szetszwo) + + HADOOP-7782. Aggregate project javadocs. (tomwhite) + + HADOOP-7789. Improvements to site navigation. (acmurthy) + + OPTIMIZATIONS + + HADOOP-7333. Performance improvement in PureJavaCrc32. (Eric Caspole + via todd) + + HADOOP-7445. Implement bulk checksum verification using efficient native + code. (todd) + + HADOOP-7753. Support fadvise and sync_file_range in NativeIO. Add + ReadaheadPool infrastructure for use in HDFS and MR. (todd) + + HADOOP-7446. Implement CRC32C native code using SSE4.2 instructions. + (Kihwal Lee and todd via todd) + + HADOOP-7763. Add top-level navigation to APT docs. (tomwhite) + + HADOOP-7785. Add equals, hashcode, toString to DataChecksum (todd) + + BUG FIXES + + HADOOP-7740. Fixed security audit logger configuration. (Arpit Gupta via Eric Yang) + + HADOOP-7630. hadoop-metrics2.properties should have a property *.period + set to a default value for metrics. (Eric Yang via mattf) + + HADOOP-7327. FileSystem.listStatus() throws NullPointerException instead of + IOException upon access permission failure. (mattf) + + HADOOP-7015. RawLocalFileSystem#listStatus does not deal with a directory + whose entries are changing (e.g. in a multi-thread or multi-process + environment). (Sanjay Radia via eli) + + HADOOP-7045. TestDU fails on systems with local file systems with + extended attributes. (eli) + + HADOOP-6939. Inconsistent lock ordering in + AbstractDelegationTokenSecretManager. (Todd Lipcon via tomwhite) + + HADOOP-7129. Fix typo in method name getProtocolSigature (todd) + + HADOOP-7048. Wrong description of Block-Compressed SequenceFile Format in + SequenceFile's javadoc. (Jingguo Yao via tomwhite) + + HADOOP-7153. MapWritable violates contract of Map interface for equals() + and hashCode(). (Nicholas Telford via todd) + + HADOOP-6754. DefaultCodec.createOutputStream() leaks memory. + (Aaron Kimball via tomwhite) + + HADOOP-7098. Tasktracker property not set in conf/hadoop-env.sh. + (Bernd Fondermann via tomwhite) + + HADOOP-7131. Exceptions thrown by Text methods should include the causing + exception. (Uma Maheswara Rao G via todd) + + HADOOP-6912. Guard against NPE when calling UGI.isLoginKeytabBased(). + (Kan Zhang via jitendra) + + HADOOP-7204. remove local unused fs variable from CmdHandler + and FsShellPermissions.changePermissions (boryas) + + HADOOP-7210. Chown command is not working from FSShell + (Uma Maheswara Rao G via todd) + + HADOOP-7215. RPC clients must use network interface corresponding to + the host in the client's kerberos principal key. (suresh) + + HADOOP-7019. Refactor build targets to enable faster cross project dev + cycles. (Luke Lu via cos) + + HADOOP-7216. Add FsCommand.runAll() with deprecated annotation for the + transition of Command base class improvement. (Daryn Sharp via szetszwo) + + HADOOP-7207. fs member of FSShell is not really needed (boryas) + + HADOOP-7223. FileContext createFlag combinations are not clearly defined. + (suresh) + + HADOOP-7231. Fix synopsis for -count. (Daryn Sharp via eli). + + HADOOP-7261. Disable IPV6 for junit tests. (suresh) + + HADOOP-7268. FileContext.getLocalFSFileContext() behavior needs to be fixed + w.r.t tokens. (jitendra) + + HADOOP-7290. Unit test failure in + TestUserGroupInformation.testGetServerSideGroups. (Trevor Robison via eli) + + HADOOP-7292. Fix racy test case TestSinkQueue. (Luke Lu via todd) + + HADOOP-7282. ipc.Server.getRemoteIp() may return null. (John George + via szetszwo) + + HADOOP-7208. Fix implementation of equals() and hashCode() in + StandardSocketFactory. (Uma Maheswara Rao G via todd) + + HADOOP-7336. TestFileContextResolveAfs will fail with default + test.build.data property. (jitendra) + + HADOOP-7284 Trash and shell's rm does not work for viewfs (Sanjay Radia) + + HADOOP-7341. Fix options parsing in CommandFormat (Daryn Sharp via todd) + + HADOOP-7353. Cleanup FsShell and prevent masking of RTE stack traces. + (Daryn Sharp via todd) + + HADOOP-7356. RPM packages broke bin/hadoop script in developer environment. + (Eric Yang via todd) + + HADOOP-7389. Use of TestingGroups by tests causes subsequent tests to fail. + (atm via tomwhite) + + HADOOP-7377. Fix command name handling affecting DFSAdmin. (Daryn Sharp + via mattf) + + HADOOP-7402. TestConfiguration doesn't clean up after itself. (atm via eli) + + HADOOP-7428. IPC connection is orphaned with null 'out' member. + (todd via eli) + + HADOOP-7437. IOUtils.copybytes will suppress the stream closure exceptions. + (Uma Maheswara Rao G via szetszwo) + + HADOOP-7090. Fix resource leaks in s3.INode, BloomMapFile, WritableUtils + and CBZip2OutputStream. (Uma Maheswara Rao G via szetszwo) + + HADOOP-7440. HttpServer.getParameterValues throws NPE for missing + parameters. (Uma Maheswara Rao G and todd via todd) + + HADOOP-7442. Docs in core-default.xml still reference deprecated config + "topology.script.file.name" (atm) + + HADOOP-7419. new hadoop-config.sh doesn't manage classpath for + HADOOP_CONF_DIR correctly. (Bing Zheng and todd via todd) + + HADOOP-7448. merge from yahoo-merge branch (via mattf): + -r 1079157: Fix content type for /stacks servlet to be + plain text (Luke Lu) + -r 1079164: No need to escape plain text (Luke Lu) + + HADOOP-7471. The saveVersion.sh script sometimes fails to extract SVN URL. + (Alejandro Abdelnur via eli) + + HADOOP-2081. Configuration getInt, getLong, and getFloat replace + invalid numbers with the default value. (Harsh J via eli) + + HADOOP-7111. Several TFile tests failing when native libraries are + present. (atm) + + HADOOP-7438. Fix deprecated warnings from hadoop-daemon.sh script. + (Ravi Prakash via suresh) + + HADOOP-7468 hadoop-core JAR contains a log4j.properties file. + (Jolly Chen) + + HADOOP-7508. Compiled nativelib is in wrong directory and it is not picked + up by surefire setup. (Alejandro Abdelnur via tomwhite) + + HADOOP-7520. Fix to add distribution management info to hadoop-main + (Alejandro Abdelnur via gkesavan) + + HADOOP-7515. test-patch reports the wrong number of javadoc warnings. + (tomwhite) + + HADOOP-7523. Test org.apache.hadoop.fs.TestFilterFileSystem fails due to + java.lang.NoSuchMethodException. (John Lee via tomwhite) + + HADOOP-7528. Maven build fails in Windows. (Alejandro Abdelnur via + tomwhite) + + HADOOP-7533. Allow test-patch to be run from any subproject directory. + (tomwhite) + + HADOOP-7512. Fix example mistake in WritableComparable javadocs. + (Harsh J via eli) + + HADOOP-7357. hadoop.io.compress.TestCodec#main() should exit with + non-zero exit code if test failed. (Philip Zeyliger via eli) + + HADOOP-6622. Token should not print the password in toString. (eli) + + HADOOP-7529. Fix lock cycles in metrics system. (llu) + + HADOOP-7545. Common -tests JAR should not include properties and configs. + (todd) + + HADOOP-7536. Correct the dependency version regressions introduced in + HADOOP-6671. (Alejandro Abdelnur via tomwhite) + + HADOOP-7566. MR tests are failing webapps/hdfs not found in CLASSPATH. + (Alejandro Abdelnur via mahadev) + + HADOOP-7567. 'mvn eclipse:eclipse' fails for hadoop-alfredo (auth). + (Alejandro Abdelnur via tomwhite) + + HADOOP-7563. Setup HADOOP_HDFS_HOME, HADOOP_MAPRED_HOME and classpath + correction. (Eric Yang via acmurthy) + + HADOOP-7560. Change src layout to be heirarchical. (Alejandro Abdelnur + via acmurthy) + + HADOOP-7576. Fix findbugs warnings and javac warnings in hadoop-auth. + (szetszwo) + + HADOOP-7593. Fix AssertionError in TestHttpServer.testMaxThreads(). + (Uma Maheswara Rao G via szetszwo) + + HADOOP-7598. Fix smart-apply-patch.sh to handle patching from a sub + directory correctly. (Robert Evans via acmurthy) + + HADOOP-7328. When a serializer class is missing, return null, not throw + an NPE. (Harsh J Chouraria via todd) + + HADOOP-7626. Bugfix for a config generator (Eric Yang via ddas) + + HADOOP-7629. Allow immutable FsPermission objects to be used as IPC + parameters. (todd) + + HADOOP-7608. SnappyCodec check for Hadoop native lib is wrong + (Alejandro Abdelnur via todd) + + HADOOP-7637. Fix to include FairScheduler configuration file in + RPM. (Eric Yang via ddas) + + HADOOP-7633. Adds log4j.properties to the hadoop-conf dir on + deploy (Eric Yang via ddas) + + HADOOP-7631. Fixes a config problem to do with running streaming jobs + (Eric Yang via ddas) + + HADOOP-7662. Fixed logs servlet to use the pathspec '/*' instead of '/' + for correct filtering. (Thomas Graves via vinodkv) + + HADOOP-7691. Fixed conflict uid for install packages. (Eric Yang) + + HADOOP-7603. Set hdfs, mapred uid, and hadoop uid to fixed numbers. + (Eric Yang) + + HADOOP-7658. Fixed HADOOP_SECURE_DN_USER environment variable in + hadoop-evn.sh (Eric Yang) + + HADOOP-7684. Added init.d script for jobhistory server and + secondary namenode. (Eric Yang) + + HADOOP-7715. Removed unnecessary security logger configuration. (Eric Yang) + + HADOOP-7685. Improved directory ownership check function in + hadoop-setup-conf.sh. (Eric Yang) + + HADOOP-7711. Fixed recursive sourcing of HADOOP_OPTS environment + variables (Arpit Gupta via Eric Yang) + + HADOOP-7681. Fixed security and hdfs audit log4j properties + (Arpit Gupta via Eric Yang) + + HADOOP-7708. Fixed hadoop-setup-conf.sh to handle config files + consistently. (Eric Yang) + + HADOOP-7724. Fixed hadoop-setup-conf.sh to put proxy user in + core-site.xml. (Arpit Gupta via Eric Yang) + + HADOOP-7755. Detect MapReduce PreCommit Trunk builds silently failing + when running test-patch.sh. (Jonathan Eagles via tomwhite) + + HADOOP-7744. Ensure failed tests exit with proper error code. (Jonathan + Eagles via acmurthy) + + HADOOP-7764. Allow HttpServer to set both ACL list and path spec filters. + (Jonathan Eagles via acmurthy) + + HADOOP-7766. The auth to local mappings are not being respected, with webhdfs + and security enabled. (jitendra) + + HADOOP-7721. Add log before login in KerberosAuthenticationHandler. + (jitendra) + + HADOOP-7778. FindBugs warning in Token.getKind(). (tomwhite) + + HADOOP-7798. Add support gpg signatures for maven release artifacts. + (cutting via acmurthy) + + HADOOP-7797. Fix top-level pom.xml to refer to correct staging maven + repository. (omalley via acmurthy) + + HADOOP-7101. UserGroupInformation.getCurrentUser() fails when called from + non-Hadoop JAAS context. (todd) + +Release 0.22.1 - Unreleased + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + + HADOOP-7937. Forward port SequenceFile#syncFs and friends from Hadoop 1.x. + (tomwhite) + +Release 0.22.0 - 2011-11-29 + + INCOMPATIBLE CHANGES + + HADOOP-7137. Remove hod contrib. (nigel via eli) + + NEW FEATURES + + HADOOP-6791. Refresh for proxy superuser config + (common part for HDFS-1096) (boryas) + + HADOOP-6581. Add authenticated TokenIdentifiers to UGI so that + they can be used for authorization (Kan Zhang and Jitendra Pandey + via jghoman) + + HADOOP-6584. Provide Kerberized SSL encryption for webservices. + (jghoman and Kan Zhang via jghoman) + + HADOOP-6853. Common component of HDFS-1045. (jghoman) + + HADOOP-6859 - Introduce additional statistics to FileSystem to track + file system operations (suresh) + + HADOOP-6870. Add a new API getFiles to FileSystem and FileContext that + lists all files under the input path or the subtree rooted at the + input path if recursive is true. Block locations are returned together + with each file's status. (hairong) + + HADOOP-6888. Add a new FileSystem API closeAllForUGI(..) for closing all + file systems associated with a particular UGI. (Devaraj Das and Kan Zhang + via szetszwo) + + HADOOP-6892. Common component of HDFS-1150 (Verify datanodes' identities + to clients in secure clusters) (jghoman) + + HADOOP-6889. Make RPC to have an option to timeout. (hairong) + + HADOOP-6996. Allow CodecFactory to return a codec object given a codec' + class name. (hairong) + + HADOOP-7013. Add boolean field isCorrupt to BlockLocation. + (Patrick Kling via hairong) + + HADOOP-6978. Adds support for NativeIO using JNI. + (Todd Lipcon, Devaraj Das & Owen O'Malley via ddas) + + HADOOP-7134. configure files that are generated as part of the released + tarball need to have executable bit set. (Roman Shaposhnik via cos) + + IMPROVEMENTS + + HADOOP-6644. util.Shell getGROUPS_FOR_USER_COMMAND method name + - should use common naming convention (boryas) + + HADOOP-6778. add isRunning() method to + AbstractDelegationTokenSecretManager (for HDFS-1044) (boryas) + + HADOOP-6633. normalize property names for JT/NN kerberos principal + names in configuration (boryas) + + HADOOP-6627. "Bad Connection to FS" message in FSShell should print + message from the exception (boryas) + + HADOOP-6600. mechanism for authorization check for inter-server + protocols. (boryas) + + HADOOP-6623. Add StringUtils.split for non-escaped single-character + separator. (Todd Lipcon via tomwhite) + + HADOOP-6761. The Trash Emptier has the ability to run more frequently. + (Dmytro Molkov via dhruba) + + HADOOP-6714. Resolve compressed files using CodecFactory in FsShell::text. + (Patrick Angeles via cdouglas) + + HADOOP-6661. User document for UserGroupInformation.doAs. + (Jitendra Pandey via jghoman) + + HADOOP-6674. Makes use of the SASL authentication options in the + SASL RPC. (Jitendra Pandey via ddas) + + HADOOP-6526. Need mapping from long principal names to local OS + user names. (boryas) + + HADOOP-6814. Adds an API in UserGroupInformation to get the real + authentication method of a passed UGI. (Jitendra Pandey via ddas) + + HADOOP-6756. Documentation for common configuration keys. + (Erik Steffl via shv) + + HADOOP-6835. Add support for concatenated gzip input. (Greg Roelofs via + cdouglas) + + HADOOP-6845. Renames the TokenStorage class to Credentials. + (Jitendra Pandey via ddas) + + HADOOP-6826. FileStatus needs unit tests. (Rodrigo Schmidt via Eli + Collins) + + HADOOP-6905. add buildDTServiceName method to SecurityUtil + (as part of MAPREDUCE-1718) (boryas) + + HADOOP-6632. Adds support for using different keytabs for different + servers in a Hadoop cluster. In the earier implementation, all servers + of a certain type (like TaskTracker), would have the same keytab and the + same principal. Now the principal name is a pattern that has _HOST in it. + (Kan Zhang & Jitendra Pandey via ddas) + + HADOOP-6861. Adds new non-static methods in Credentials to read and + write token storage file. (Jitendra Pandey & Owen O'Malley via ddas) + + HADOOP-6877. Common part of HDFS-1178 (NameNode servlets should communicate + with NameNode directrly). (Kan Zhang via jghoman) + + HADOOP-6475. Adding some javadoc to Server.RpcMetrics, UGI. + (Jitendra Pandey and borya via jghoman) + + HADOOP-6656. Adds a thread in the UserGroupInformation to renew TGTs + periodically. (Owen O'Malley and ddas via ddas) + + HADOOP-6890. Improve listFiles API introduced by HADOOP-6870. (hairong) + + HADOOP-6862. Adds api to add/remove user and group to AccessControlList + (amareshwari) + + HADOOP-6911. doc update for DelegationTokenFetcher (boryas) + + HADOOP-6900. Make the iterator returned by FileSystem#listLocatedStatus to + throw IOException rather than RuntimeException when there is an IO error + fetching the next file. (hairong) + + HADOOP-6905. Better logging messages when a delegation token is invalid. + (Kan Zhang via jghoman) + + HADOOP-6693. Add metrics to track kerberol login activity. (suresh) + + HADOOP-6803. Add native gzip read/write coverage to TestCodec. + (Eli Collins via tomwhite) + + HADOOP-6950. Suggest that HADOOP_CLASSPATH should be preserved in + hadoop-env.sh.template. (Philip Zeyliger via Eli Collins) + + HADOOP-6922. Make AccessControlList a writable and update documentation + for Job ACLs. (Ravi Gummadi via vinodkv) + + HADOOP-6965. Introduces checks for whether the original tgt is valid + in the reloginFromKeytab method. + + HADOOP-6856. Simplify constructors for SequenceFile, and MapFile. (omalley) + + HADOOP-6987. Use JUnit Rule to optionally fail test cases that run more + than 10 seconds (jghoman) + + HADOOP-7005. Update test-patch.sh to remove callback to Hudson. (nigel) + + HADOOP-6985. Suggest that HADOOP_OPTS be preserved in + hadoop-env.sh.template. (Ramkumar Vadali via cutting) + + HADOOP-7007. Update the hudson-test-patch ant target to work with the + latest test-patch.sh script (gkesavan) + + HADOOP-7010. Typo in FileSystem.java. (Jingguo Yao via eli) + + HADOOP-7009. MD5Hash provides a public factory method that creates an + instance of thread local MessageDigest. (hairong) + + HADOOP-7008. Enable test-patch.sh to have a configured number of + acceptable findbugs and javadoc warnings. (nigel and gkesavan) + + HADOOP-6818. Provides a JNI implementation of group resolution. (ddas) + + HADOOP-6943. The GroupMappingServiceProvider interface should be public. + (Aaron T. Myers via tomwhite) + + HADOOP-4675. Current Ganglia metrics implementation is incompatible with + Ganglia 3.1. (Brian Bockelman via tomwhite) + + HADOOP-6977. Herriot daemon clients should vend statistics (cos) + + HADOOP-7024. Create a test method for adding file systems during tests. + (Kan Zhang via jghoman) + + HADOOP-6903. Make AbstractFSileSystem methods and some FileContext methods + to be public. (Sanjay Radia) + + HADOOP-7034. Add TestPath tests to cover dot, dot dot, and slash + normalization. (eli) + + HADOOP-7032. Assert type constraints in the FileStatus constructor. (eli) + + HADOOP-6562. FileContextSymlinkBaseTest should use FileContextTestHelper. + (eli) + + HADOOP-7028. ant eclipse does not include requisite ant.jar in the + classpath. (Patrick Angeles via eli) + + HADOOP-6298. Add copyBytes to Text and BytesWritable. (omalley) + + HADOOP-6578. Configuration should trim whitespace around a lot of value + types. (Michele Catasta via eli) + + HADOOP-6811. Remove EC2 bash scripts. They are replaced by Apache Whirr + (incubating, http://incubator.apache.org/whirr). (tomwhite) + + HADOOP-7102. Remove "fs.ramfs.impl" field from core-deafult.xml (shv) + + HADOOP-7104. Remove unnecessary DNS reverse lookups from RPC layer + (Kan Zhang via todd) + + HADOOP-6056. Use java.net.preferIPv4Stack to force IPv4. + (Michele Catasta via shv) + + HADOOP-7110. Implement chmod with JNI. (todd) + + HADOOP-6812. Change documentation for correct placement of configuration + variables: mapreduce.reduce.input.buffer.percent, + mapreduce.task.io.sort.factor, mapreduce.task.io.sort.mb + (Chris Douglas via shv) + + HADOOP-6436. Remove auto-generated native build files. (rvs via eli) + + HADOOP-6970. SecurityAuth.audit should be generated under /build. (boryas) + + HADOOP-7154. Should set MALLOC_ARENA_MAX in hadoop-env.sh (todd) + + HADOOP-7187. Fix socket leak in GangliaContext. (Uma Maheswara Rao G + via szetszwo) + + HADOOP-7241. fix typo of command 'hadoop fs -help tail'. + (Wei Yongjun via eli) + + HADOOP-7244. Documentation change for updated configuration keys. + (tomwhite via eli) + + HADOOP-7189. Add ability to enable 'debug' property in JAAS configuration. + (Ted Yu via todd) + + HADOOP-7192. Update fs -stat docs to reflect the format features. (Harsh + J Chouraria via todd) + + HADOOP-7355 Add audience and stability annotations to HttpServer class + (stack) + + HADOOP-7346. Send back nicer error message to clients using outdated IPC + version. (todd) + + HADOOP-7335. Force entropy to come from non-true random for tests. + (todd via eli) + + HADOOP-7325. The hadoop command should not accept class names starting with + a hyphen. (Brock Noland via todd) + + HADOOP-7772. javadoc the topology classes (stevel) + + HADOOP-7786. Remove HDFS-specific config keys defined in FsConfig. (eli) + + HADOOP-7861. changes2html.pl generates links to HADOOP, HDFS, and MAPREDUCE + jiras. (shv) + + OPTIMIZATIONS + + HADOOP-6884. Add LOG.isDebugEnabled() guard for each LOG.debug(..). + (Erik Steffl via szetszwo) + + HADOOP-6683. ZlibCompressor does not fully utilize the buffer. + (Kang Xiao via eli) + + HADOOP-6949. Reduce RPC packet size of primitive arrays using + ArrayPrimitiveWritable instead of ObjectWritable. (Matt Foley via suresh) + + BUG FIXES + + HADOOP-6638. try to relogin in a case of failed RPC connection (expired + tgt) only in case the subject is loginUser or proxyUgi.realUser. (boryas) + + HADOOP-6781. security audit log shouldn't have exception in it. (boryas) + + HADOOP-6612. Protocols RefreshUserToGroupMappingsProtocol and + RefreshAuthorizationPolicyProtocol will fail with security enabled (boryas) + + HADOOP-6764. Remove verbose logging from the Groups class. (Boris Shkolnik) + + HADOOP-6730. Bug in FileContext#copy and provide base class for + FileContext tests. (Ravi Phulari via jghoman) + + HADOOP-6669. Respect compression configuration when creating DefaultCodec + instances. (Koji Noguchi via cdouglas) + + HADOOP-6747. TestNetUtils fails on Mac OS X. (Todd Lipcon via jghoman) + + HADOOP-6787. Factor out glob pattern code from FileContext and + Filesystem. Also fix bugs identified in HADOOP-6618 and make the + glob pattern code less restrictive and more POSIX standard + compliant. (Luke Lu via eli) + + HADOOP-6649. login object in UGI should be inside the subject (jnp via + boryas) + + HADOOP-6687. user object in the subject in UGI should be reused in case + of a relogin. (jnp via boryas) + + HADOOP-6603. Provide workaround for issue with Kerberos not resolving + cross-realm principal (Kan Zhang and Jitendra Pandey via jghoman) + + HADOOP-6620. NPE if renewer is passed as null in getDelegationToken. + (Jitendra Pandey via jghoman) + + HADOOP-6613. Moves the RPC version check ahead of the AuthMethod check. + (Kan Zhang via ddas) + + HADOOP-6682. NetUtils:normalizeHostName does not process hostnames starting + with [a-f] correctly. (jghoman) + + HADOOP-6652. Removes the unnecessary cache from + ShellBasedUnixGroupsMapping. (ddas) + + HADOOP-6815. refreshSuperUserGroupsConfiguration should use server side + configuration for the refresh (boryas) + + HADOOP-6648. Adds a check for null tokens in Credentials.addToken api. + (ddas) + + HADOOP-6647. balancer fails with "is not authorized for protocol + interface NamenodeProtocol" in secure environment (boryas) + + HADOOP-6834. TFile.append compares initial key against null lastKey + (hong tang via mahadev) + + HADOOP-6670. Use the UserGroupInformation's Subject as the criteria for + equals and hashCode. (Owen O'Malley and Kan Zhang via ddas) + + HADOOP-6536. Fixes FileUtil.fullyDelete() not to delete the contents of + the sym-linked directory. (Ravi Gummadi via amareshwari) + + HADOOP-6873. using delegation token over hftp for long + running clients (boryas) + + HADOOP-6706. Improves the sasl failure handling due to expired tickets, + and other server detected failures. (Jitendra Pandey and ddas via ddas) + + HADOOP-6715. Fixes AccessControlList.toString() to return a descriptive + String representation of the ACL. (Ravi Gummadi via amareshwari) + + HADOOP-6885. Fix java doc warnings in Groups and + RefreshUserMappingsProtocol. (Eli Collins via jghoman) + + HADOOP-6482. GenericOptionsParser constructor that takes Options and + String[] ignores options. (Eli Collins via jghoman) + + HADOOP-6906. FileContext copy() utility doesn't work with recursive + copying of directories. (vinod k v via mahadev) + + HADOOP-6453. Hadoop wrapper script shouldn't ignore an existing + JAVA_LIBRARY_PATH. (Chad Metcalf via jghoman) + + HADOOP-6932. Namenode start (init) fails because of invalid kerberos + key, even when security set to "simple" (boryas) + + HADOOP-6913. Circular initialization between UserGroupInformation and + KerberosName (Kan Zhang via boryas) + + HADOOP-6907. Rpc client doesn't use the per-connection conf to figure + out server's Kerberos principal (Kan Zhang via hairong) + + HADOOP-6938. ConnectionId.getRemotePrincipal() should check if security + is enabled. (Kan Zhang via hairong) + + HADOOP-6930. AvroRpcEngine doesn't work with generated Avro code. + (sharad) + + HADOOP-6940. RawLocalFileSystem's markSupported method misnamed + markSupport. (Tom White via eli). + + HADOOP-6951. Distinct minicluster services (e.g. NN and JT) overwrite each + other's service policies. (Aaron T. Myers via tomwhite) + + HADOOP-6879. Provide SSH based (Jsch) remote execution API for system + tests (cos) + + HADOOP-6989. Correct the parameter for SetFile to set the value type + for SetFile to be NullWritable instead of the key. (cdouglas via omalley) + + HADOOP-6984. Combine the compress kind and the codec in the same option + for SequenceFiles. (cdouglas via omalley) + + HADOOP-6933. TestListFiles is flaky. (Todd Lipcon via tomwhite) + + HADOOP-6947. Kerberos relogin should set refreshKrb5Config to true. + (Todd Lipcon via tomwhite) + + HADOOP-7006. Fix 'fs -getmerge' command to not be a no-op. + (Chris Nauroth via cutting) + + HADOOP-6663. BlockDecompressorStream get EOF exception when decompressing + the file compressed from empty file. (Kang Xiao via tomwhite) + + HADOOP-6991. Fix SequenceFile::Reader to honor file lengths and call + openFile (cdouglas via omalley) + + HADOOP-7011. Fix KerberosName.main() to not throw an NPE. + (Aaron T. Myers via tomwhite) + + HADOOP-6975. Integer overflow in S3InputStream for blocks > 2GB. + (Patrick Kling via tomwhite) + + HADOOP-6758. MapFile.fix does not allow index interval definition. + (Gianmarco De Francisci Morales via tomwhite) + + HADOOP-6926. SocketInputStream incorrectly implements read(). + (Todd Lipcon via tomwhite) + + HADOOP-6899 RawLocalFileSystem#setWorkingDir() does not work for relative names + (Sanjay Radia) + + HADOOP-6496. HttpServer sends wrong content-type for CSS files + (and others). (Todd Lipcon via tomwhite) + + HADOOP-7057. IOUtils.readFully and IOUtils.skipFully have typo in + exception creation's message. (cos) + + HADOOP-7038. saveVersion script includes an additional \r while running + whoami under windows. (Wang Xu via cos) + + HADOOP-7082. Configuration.writeXML should not hold lock while outputting + (todd) + + HADOOP-7070. JAAS configuration should delegate unknown application names + to pre-existing configuration. (todd) + + HADOOP-7087. SequenceFile.createWriter ignores FileSystem parameter (todd) + + HADOOP-7091. reloginFromKeytab() should happen even if TGT can't be found. + (Kan Zhang via jghoman) + + HADOOP-7100. Fix build to not refer to contrib/ec2 removed by HADOOP-6811 + (todd) + + HADOOP-7097. JAVA_LIBRARY_PATH missing base directory. (Noah Watkins via + todd) + + HADOOP-7093. Servlets should default to text/plain (todd) + + HADOOP-7089. Fix link resolution logic in hadoop-config.sh. (eli) + + HADOOP-7046. Fix Findbugs warning in Configuration. (Po Cheung via shv) + + HADOOP-7118. Fix NPE in Configuration.writeXml (todd) + + HADOOP-7122. Fix thread leak when shell commands time out. (todd) + + HADOOP-7126. Fix file permission setting for RawLocalFileSystem on Windows. + (Po Cheung via shv) + + HADOOP-6642. Fix javac, javadoc, findbugs warnings related to security work. + (Chris Douglas, Po Cheung via shv) + + HADOOP-7140. IPC Reader threads do not stop when server stops (todd) + + HADOOP-7094. hadoop.css got lost during project split (cos) + + HADOOP-7145. Configuration.getLocalPath should trim whitespace from + the provided directories. (todd) + + HADOOP-7156. Workaround for unsafe implementations of getpwuid_r (todd) + + HADOOP-6898. FileSystem.copyToLocal creates files with 777 permissions. + (Aaron T. Myers via tomwhite) + + HADOOP-7229. Do not default to an absolute path for kinit in Kerberos + auto-renewal thread. (Aaron T. Myers via todd) + + HADOOP-7172. SecureIO should not check owner on non-secure + clusters that have no native support. (todd via eli) + + HADOOP-7184. Remove deprecated config local.cache.size from + core-default.xml (todd) + + HADOOP-7245. FsConfig should use constants in CommonConfigurationKeys. + (tomwhite via eli) + + HADOOP-7068. Ivy resolve force mode should be turned off by default. + (Luke Lu via tomwhite) + + HADOOP-7296. The FsPermission(FsPermission) constructor does not use the + sticky bit. (Siddharth Seth via tomwhite) + + HADOOP-7300. Configuration methods that return collections are inconsistent + about mutability. (todd) + + HADOOP-7305. Eclipse project classpath should include tools.jar from JDK. + (Niels Basjes via todd) + + HADOOP-7318. MD5Hash factory should reset the digester it returns. + (todd via eli) + + HADOOP-7287. Configuration deprecation mechanism doesn't work properly for + GenericOptionsParser and Tools. (Aaron T. Myers via todd) + + HADOOP-7146. RPC server leaks file descriptors (todd) + + HADOOP-7276. Hadoop native builds fail on ARM due to -m32 (Trevor Robinson + via eli) + + HADOOP-7121. Exceptions while serializing IPC call responses are not + handled well. (todd) + + HADOOP-7351 Regression: HttpServer#getWebAppsPath used to be protected + so subclasses could supply alternate webapps path but it was made private + by HADOOP-6461 (Stack) + + HADOOP-7349. HADOOP-7121 accidentally disabled some tests in TestIPC. + (todd) + + HADOOP-7390. VersionInfo not generated properly in git after unsplit. (todd + via atm) + + HADOOP-7568. SequenceFile should not print into stdout. + (Plamen Jeliazkov via shv) + + HADOOP-7663. Fix TestHDFSTrash failure. (Mayank Bansal via shv) + + HADOOP-7457. Remove out-of-date Chinese language documentation. + (Jakob Homan via eli) + + HADOOP-7783. Add more symlink tests that cover intermediate links. (eli) + +Release 0.21.1 - Unreleased + + IMPROVEMENTS + + HADOOP-6934. Test for ByteWritable comparator. + (Johannes Zillmann via Eli Collins) + + HADOOP-6786. test-patch needs to verify Herriot integrity (cos) + + HADOOP-7177. CodecPool should report which compressor it is using. + (Allen Wittenauer via eli) + + BUG FIXES + + HADOOP-6925. BZip2Codec incorrectly implements read(). + (Todd Lipcon via Eli Collins) + + HADOOP-6833. IPC leaks call parameters when exceptions thrown. + (Todd Lipcon via Eli Collins) + + HADOOP-6971. Clover build doesn't generate per-test coverage (cos) + + HADOOP-6993. Broken link on cluster setup page of docs. (eli) + + HADOOP-6944. [Herriot] Implement a functionality for getting proxy users + definitions like groups and hosts. (Vinay Thota via cos) + + HADOOP-6954. Sources JARs are not correctly published to the Maven + repository. (tomwhite) + + HADOOP-7052. misspelling of threshold in conf/log4j.properties. + (Jingguo Yao via eli) + + HADOOP-7053. wrong FSNamesystem Audit logging setting in + conf/log4j.properties. (Jingguo Yao via eli) + + HADOOP-7120. Fix a syntax error in test-patch.sh. (szetszwo) + + HADOOP-7162. Rmove a duplicated call FileSystem.listStatus(..) in FsShell. + (Alexey Diomin via szetszwo) + + HADOOP-7117. Remove fs.checkpoint.* from core-default.xml and replace + fs.checkpoint.* with dfs.namenode.checkpoint.* in documentations. + (Harsh J Chouraria via szetszwo) + + HADOOP-7193. Correct the "fs -touchz" command help message. + (Uma Maheswara Rao G via szetszwo) + + HADOOP-7174. Null is displayed in the "fs -copyToLocal" command. + (Uma Maheswara Rao G via szetszwo) + + HADOOP-7194. Fix resource leak in IOUtils.copyBytes(..). + (Devaraj K via szetszwo) + + HADOOP-7183. WritableComparator.get should not cache comparator objects. + (tomwhite via eli) + +Release 0.21.0 - 2010-08-13 + + INCOMPATIBLE CHANGES + + HADOOP-4895. Remove deprecated methods DFSClient.getHints(..) and + DFSClient.isDirectory(..). (szetszwo) + + HADOOP-4941. Remove deprecated FileSystem methods: getBlockSize(Path f), + getLength(Path f) and getReplication(Path src). (szetszwo) + + HADOOP-4648. Remove obsolete, deprecated InMemoryFileSystem and + ChecksumDistributedFileSystem. (cdouglas via szetszwo) + + HADOOP-4940. Remove a deprecated method FileSystem.delete(Path f). (Enis + Soztutar via szetszwo) + + HADOOP-4010. Change semantics for LineRecordReader to read an additional + line per split- rather than moving back one character in the stream- to + work with splittable compression codecs. (Abdul Qadeer via cdouglas) + + HADOOP-5094. Show hostname and separate live/dead datanodes in DFSAdmin + report. (Jakob Homan via szetszwo) + + HADOOP-4942. Remove deprecated FileSystem methods getName() and + getNamed(String name, Configuration conf). (Jakob Homan via szetszwo) + + HADOOP-5486. Removes the CLASSPATH string from the command line and instead + exports it in the environment. (Amareshwari Sriramadasu via ddas) + + HADOOP-2827. Remove deprecated NetUtils::getServerAddress. (cdouglas) + + HADOOP-5681. Change examples RandomWriter and RandomTextWriter to + use new mapreduce API. (Amareshwari Sriramadasu via sharad) + + HADOOP-5680. Change org.apache.hadoop.examples.SleepJob to use new + mapreduce api. (Amareshwari Sriramadasu via sharad) + + HADOOP-5699. Change org.apache.hadoop.examples.PiEstimator to use + new mapreduce api. (Amareshwari Sriramadasu via sharad) + + HADOOP-5720. Introduces new task types - JOB_SETUP, JOB_CLEANUP + and TASK_CLEANUP. Removes the isMap methods from TaskID/TaskAttemptID + classes. (ddas) + + HADOOP-5668. Change TotalOrderPartitioner to use new API. (Amareshwari + Sriramadasu via cdouglas) + + HADOOP-5738. Split "waiting_tasks" JobTracker metric into waiting maps and + waiting reduces. (Sreekanth Ramakrishnan via cdouglas) + + HADOOP-5679. Resolve findbugs warnings in core/streaming/pipes/examples. + (Jothi Padmanabhan via sharad) + + HADOOP-4359. Support for data access authorization checking on Datanodes. + (Kan Zhang via rangadi) + + HADOOP-5690. Change org.apache.hadoop.examples.DBCountPageView to use + new mapreduce api. (Amareshwari Sriramadasu via sharad) + + HADOOP-5694. Change org.apache.hadoop.examples.dancing to use new + mapreduce api. (Amareshwari Sriramadasu via sharad) + + HADOOP-5696. Change org.apache.hadoop.examples.Sort to use new + mapreduce api. (Amareshwari Sriramadasu via sharad) + + HADOOP-5698. Change org.apache.hadoop.examples.MultiFileWordCount to + use new mapreduce api. (Amareshwari Sriramadasu via sharad) + + HADOOP-5913. Provide ability to an administrator to stop and start + job queues. (Rahul Kumar Singh and Hemanth Yamijala via yhemanth) + + MAPREDUCE-711. Removed Distributed Cache from Common, to move it + under Map/Reduce. (Vinod Kumar Vavilapalli via yhemanth) + + HADOOP-6201. Change FileSystem::listStatus contract to throw + FileNotFoundException if the directory does not exist, rather than letting + this be implementation-specific. (Jakob Homan via cdouglas) + + HADOOP-6230. Moved process tree and memory calculator related classes + from Common to Map/Reduce. (Vinod Kumar Vavilapalli via yhemanth) + + HADOOP-6203. FsShell rm/rmr error message indicates exceeding Trash quota + and suggests using -skpTrash, when moving to trash fails. + (Boris Shkolnik via suresh) + + HADOOP-6303. Eclipse .classpath template has outdated jar files and is + missing some new ones. (cos) + + HADOOP-6396. Fix uninformative exception message when unable to parse + umask. (jghoman) + + HADOOP-6299. Reimplement the UserGroupInformation to use the OS + specific and Kerberos JAAS login. (omalley) + + HADOOP-6686. Remove redundant exception class name from the exception + message for the exceptions thrown at RPC client. (suresh) + + HADOOP-6701. Fix incorrect exit codes returned from chmod, chown and chgrp + commands from FsShell. (Ravi Phulari via suresh) + + NEW FEATURES + + HADOOP-6332. Large-scale Automated Test Framework. (sharad, Sreekanth + Ramakrishnan, at all via cos) + + HADOOP-4268. Change fsck to use ClientProtocol methods so that the + corresponding permission requirement for running the ClientProtocol + methods will be enforced. (szetszwo) + + HADOOP-3953. Implement sticky bit for directories in HDFS. (Jakob Homan + via szetszwo) + + HADOOP-4368. Implement df in FsShell to show the status of a FileSystem. + (Craig Macdonald via szetszwo) + + HADOOP-3741. Add a web ui to the SecondaryNameNode for showing its status. + (szetszwo) + + HADOOP-5018. Add pipelined writers to Chukwa. (Ari Rabkin via cdouglas) + + HADOOP-5052. Add an example computing exact digits of pi using the + Bailey-Borwein-Plouffe algorithm. (Tsz Wo (Nicholas), SZE via cdouglas) + + HADOOP-4927. Adds a generic wrapper around outputformat to allow creation of + output on demand (Jothi Padmanabhan via ddas) + + HADOOP-5144. Add a new DFSAdmin command for changing the setting of restore + failed storage replicas in namenode. (Boris Shkolnik via szetszwo) + + HADOOP-5258. Add a new DFSAdmin command to print a tree of the rack and + datanode topology as seen by the namenode. (Jakob Homan via szetszwo) + + HADOOP-4756. A command line tool to access JMX properties on NameNode + and DataNode. (Boris Shkolnik via rangadi) + + HADOOP-4539. Introduce backup node and checkpoint node. (shv) + + HADOOP-5363. Add support for proxying connections to multiple clusters with + different versions to hdfsproxy. (Zhiyong Zhang via cdouglas) + + HADOOP-5528. Add a configurable hash partitioner operating on ranges of + BinaryComparable keys. (Klaas Bosteels via shv) + + HADOOP-5257. HDFS servers may start and stop external components through + a plugin interface. (Carlos Valiente via dhruba) + + HADOOP-5450. Add application-specific data types to streaming's typed bytes + interface. (Klaas Bosteels via omalley) + + HADOOP-5518. Add contrib/mrunit, a MapReduce unit test framework. + (Aaron Kimball via cutting) + + HADOOP-5469. Add /metrics servlet to daemons, providing metrics + over HTTP as either text or JSON. (Philip Zeyliger via cutting) + + HADOOP-5467. Introduce offline fsimage image viewer. (Jakob Homan via shv) + + HADOOP-5752. Add a new hdfs image processor, Delimited, to oiv. (Jakob + Homan via szetszwo) + + HADOOP-5266. Adds the capability to do mark/reset of the reduce values + iterator in the Context object API. (Jothi Padmanabhan via ddas) + + HADOOP-5745. Allow setting the default value of maxRunningJobs for all + pools. (dhruba via matei) + + HADOOP-5643. Adds a way to decommission TaskTrackers while the JobTracker + is running. (Amar Kamat via ddas) + + HADOOP-4829. Allow FileSystem shutdown hook to be disabled. + (Todd Lipcon via tomwhite) + + HADOOP-5815. Sqoop: A database import tool for Hadoop. + (Aaron Kimball via tomwhite) + + HADOOP-4861. Add disk usage with human-readable size (-duh). + (Todd Lipcon via tomwhite) + + HADOOP-5844. Use mysqldump when connecting to local mysql instance in Sqoop. + (Aaron Kimball via tomwhite) + + HADOOP-5976. Add a new command, classpath, to the hadoop script. (Owen + O'Malley and Gary Murry via szetszwo) + + HADOOP-6120. Add support for Avro specific and reflect data. + (sharad via cutting) + + HADOOP-6226. Moves BoundedByteArrayOutputStream from the tfile package to + the io package and makes it available to other users (MAPREDUCE-318). + (Jothi Padmanabhan via ddas) + + HADOOP-6105. Adds support for automatically handling deprecation of + configuration keys. (V.V.Chaitanya Krishna via yhemanth) + + HADOOP-6235. Adds new method to FileSystem for clients to get server + defaults. (Kan Zhang via suresh) + + HADOOP-6234. Add new option dfs.umaskmode to set umask in configuration + to use octal or symbolic instead of decimal. (Jakob Homan via suresh) + + HADOOP-5073. Add annotation mechanism for interface classification. + (Jakob Homan via suresh) + + HADOOP-4012. Provide splitting support for bzip2 compressed files. (Abdul + Qadeer via cdouglas) + + HADOOP-6246. Add backward compatibility support to use deprecated decimal + umask from old configuration. (Jakob Homan via suresh) + + HADOOP-4952. Add new improved file system interface FileContext for the + application writer (Sanjay Radia via suresh) + + HADOOP-6170. Add facility to tunnel Avro RPCs through Hadoop RPCs. + This permits one to take advantage of both Avro's RPC versioning + features and Hadoop's proven RPC scalability. (cutting) + + HADOOP-6267. Permit building contrib modules located in external + source trees. (Todd Lipcon via cutting) + + HADOOP-6240. Add new FileContext rename operation that posix compliant + that allows overwriting existing destination. (suresh) + + HADOOP-6204. Implementing aspects development and fault injeciton + framework for Hadoop (cos) + + HADOOP-6313. Implement Syncable interface in FSDataOutputStream to expose + flush APIs to application users. (Hairong Kuang via suresh) + + HADOOP-6284. Add a new parameter, HADOOP_JAVA_PLATFORM_OPTS, to + hadoop-config.sh so that it allows setting java command options for + JAVA_PLATFORM. (Koji Noguchi via szetszwo) + + HADOOP-6337. Updates FilterInitializer class to be more visible, + and the init of the class is made to take a Configuration argument. + (Jakob Homan via ddas) + + Hadoop-6223. Add new file system interface AbstractFileSystem with + implementation of some file systems that delegate to old FileSystem. + (Sanjay Radia via suresh) + + HADOOP-6433. Introduce asychronous deletion of files via a pool of + threads. This can be used to delete files in the Distributed + Cache. (Zheng Shao via dhruba) + + HADOOP-6415. Adds a common token interface for both job token and + delegation token. (Kan Zhang via ddas) + + HADOOP-6408. Add a /conf servlet to dump running configuration. + (Todd Lipcon via tomwhite) + + HADOOP-6520. Adds APIs to read/write Token and secret keys. Also + adds the automatic loading of tokens into UserGroupInformation + upon login. The tokens are read from a file specified in the + environment variable. (ddas) + + HADOOP-6419. Adds SASL based authentication to RPC. + (Kan Zhang via ddas) + + HADOOP-6510. Adds a way for superusers to impersonate other users + in a secure environment. (Jitendra Nath Pandey via ddas) + + HADOOP-6421. Adds Symbolic links to FileContext, AbstractFileSystem. + It also adds a limited implementation for the local file system + (RawLocalFs) that allows local symlinks. (Eli Collins via Sanjay Radia) + + HADOOP-6577. Add hidden configuration option "ipc.server.max.response.size" + to change the default 1 MB, the maximum size when large IPC handler + response buffer is reset. (suresh) + + HADOOP-6568. Adds authorization for the default servlets. + (Vinod Kumar Vavilapalli via ddas) + + HADOOP-6586. Log authentication and authorization failures and successes + for RPC (boryas) + + HADOOP-6580. UGI should contain authentication method. (jnp via boryas) + + HADOOP-6657. Add a capitalization method to StringUtils for MAPREDUCE-1545. + (Luke Lu via Steve Loughran) + + HADOOP-6692. Add FileContext#listStatus that returns an iterator. + (hairong) + + HADOOP-6869. Functionality to create file or folder on a remote daemon + side (Vinay Thota via cos) + + IMPROVEMENTS + + HADOOP-6798. Align Ivy version for all Hadoop subprojects. (cos) + + HADOOP-6777. Implement a functionality for suspend and resume a process. + (Vinay Thota via cos) + + HADOOP-6772. Utilities for system tests specific. (Vinay Thota via cos) + + HADOOP-6771. Herriot's artifact id for Maven deployment should be set to + hadoop-core-instrumented (cos) + + HADOOP-6752. Remote cluster control functionality needs JavaDocs + improvement (Balaji Rajagopalan via cos). + + HADOOP-4565. Added CombineFileInputFormat to use data locality information + to create splits. (dhruba via zshao) + + HADOOP-4936. Improvements to TestSafeMode. (shv) + + HADOOP-4985. Remove unnecessary "throw IOException" declarations in + FSDirectory related methods. (szetszwo) + + HADOOP-5017. Change NameNode.namesystem declaration to private. (szetszwo) + + HADOOP-4794. Add branch information from the source version control into + the version information that is compiled into Hadoop. (cdouglas via + omalley) + + HADOOP-5070. Increment copyright year to 2009, remove assertions of ASF + copyright to licensed files. (Tsz Wo (Nicholas), SZE via cdouglas) + + HADOOP-5037. Deprecate static FSNamesystem.getFSNamesystem(). (szetszwo) + + HADOOP-5088. Include releaseaudit target as part of developer test-patch + target. (Giridharan Kesavan via nigel) + + HADOOP-2721. Uses setsid when creating new tasks so that subprocesses of + this process will be within this new session (and this process will be + the process leader for all the subprocesses). Killing the process leader, + or the main Java task in Hadoop's case, kills the entire subtree of + processes. (Ravi Gummadi via ddas) + + HADOOP-5097. Remove static variable JspHelper.fsn, a static reference to + a non-singleton FSNamesystem object. (szetszwo) + + HADOOP-3327. Improves handling of READ_TIMEOUT during map output copying. + (Amareshwari Sriramadasu via ddas) + + HADOOP-5124. Choose datanodes randomly instead of starting from the first + datanode for providing fairness. (hairong via szetszwo) + + HADOOP-4930. Implement a Linux native executable that can be used to + launch tasks as users. (Sreekanth Ramakrishnan via yhemanth) + + HADOOP-5122. Fix format of fs.default.name value in libhdfs test conf. + (Craig Macdonald via tomwhite) + + HADOOP-5038. Direct daemon trace to debug log instead of stdout. (Jerome + Boulon via cdouglas) + + HADOOP-5101. Improve packaging by adding 'all-jars' target building core, + tools, and example jars. Let findbugs depend on this rather than the 'tar' + target. (Giridharan Kesavan via cdouglas) + + HADOOP-4868. Splits the hadoop script into three parts - bin/hadoop, + bin/mapred and bin/hdfs. (Sharad Agarwal via ddas) + + HADOOP-1722. Adds support for TypedBytes and RawBytes in Streaming. + (Klaas Bosteels via ddas) + + HADOOP-4220. Changes the JobTracker restart tests so that they take much + less time. (Amar Kamat via ddas) + + HADOOP-4885. Try to restore failed name-node storage directories at + checkpoint time. (Boris Shkolnik via shv) + + HADOOP-5209. Update year to 2009 for javadoc. (szetszwo) + + HADOOP-5279. Remove unnecessary targets from test-patch.sh. + (Giridharan Kesavan via nigel) + + HADOOP-5120. Remove the use of FSNamesystem.getFSNamesystem() from + UpgradeManagerNamenode and UpgradeObjectNamenode. (szetszwo) + + HADOOP-5222. Add offset to datanode clienttrace. (Lei Xu via cdouglas) + + HADOOP-5240. Skip re-building javadoc when it is already + up-to-date. (Aaron Kimball via cutting) + + HADOOP-5042. Add a cleanup stage to log rollover in Chukwa appender. + (Jerome Boulon via cdouglas) + + HADOOP-5264. Removes redundant configuration object from the TaskTracker. + (Sharad Agarwal via ddas) + + HADOOP-5232. Enable patch testing to occur on more than one host. + (Giri Kesavan via nigel) + + HADOOP-4546. Fix DF reporting for AIX. (Bill Habermaas via cdouglas) + + HADOOP-5023. Add Tomcat support to HdfsProxy. (Zhiyong Zhang via cdouglas) + + HADOOP-5317. Provide documentation for LazyOutput Feature. + (Jothi Padmanabhan via johan) + + HADOOP-5455. Document rpc metrics context to the extent dfs, mapred, and + jvm contexts are documented. (Philip Zeyliger via cdouglas) + + HADOOP-5358. Provide scripting functionality to the synthetic load + generator. (Jakob Homan via hairong) + + HADOOP-5442. Paginate jobhistory display and added some search + capabilities. (Amar Kamat via acmurthy) + + HADOOP-4842. Streaming now allows specifiying a command for the combiner. + (Amareshwari Sriramadasu via ddas) + + HADOOP-5196. avoiding unnecessary byte[] allocation in + SequenceFile.CompressedBytes and SequenceFile.UncompressedBytes. + (hong tang via mahadev) + + HADOOP-4655. New method FileSystem.newInstance() that always returns + a newly allocated FileSystem object. (dhruba) + + HADOOP-4788. Set Fair scheduler to assign both a map and a reduce on each + heartbeat by default. (matei) + + HADOOP-5491. In contrib/index, better control memory usage. + (Ning Li via cutting) + + HADOOP-5423. Include option of preserving file metadata in + SequenceFile::sort. (Michael Tamm via cdouglas) + + HADOOP-5331. Add support for KFS appends. (Sriram Rao via cdouglas) + + HADOOP-4365. Make Configuration::getProps protected in support of + meaningful subclassing. (Steve Loughran via cdouglas) + + HADOOP-2413. Remove the static variable FSNamesystem.fsNamesystemObject. + (Konstantin Shvachko via szetszwo) + + HADOOP-4584. Improve datanode block reports and associated file system + scan to avoid interefering with normal datanode operations. + (Suresh Srinivas via rangadi) + + HADOOP-5502. Documentation for backup and checkpoint nodes. + (Jakob Homan via shv) + + HADOOP-5485. Mask actions in the fair scheduler's servlet UI based on + value of webinterface.private.actions. + (Vinod Kumar Vavilapalli via yhemanth) + + HADOOP-5581. HDFS should throw FileNotFoundException when while opening + a file that does not exist. (Brian Bockelman via rangadi) + + HADOOP-5509. PendingReplicationBlocks does not start monitor in the + constructor. (shv) + + HADOOP-5494. Modify sorted map output merger to lazily read values, + rather than buffering at least one record for each segment. (Devaraj Das + via cdouglas) + + HADOOP-5396. Provide ability to refresh queue ACLs in the JobTracker + without having to restart the daemon. + (Sreekanth Ramakrishnan and Vinod Kumar Vavilapalli via yhemanth) + + HADOOP-4490. Provide ability to run tasks as job owners. + (Sreekanth Ramakrishnan via yhemanth) + + HADOOP-5697. Change org.apache.hadoop.examples.Grep to use new + mapreduce api. (Amareshwari Sriramadasu via sharad) + + HADOOP-5625. Add operation duration to clienttrace. (Lei Xu via cdouglas) + + HADOOP-5705. Improve TotalOrderPartitioner efficiency by updating the trie + construction. (Dick King via cdouglas) + + HADOOP-5589. Eliminate source limit of 64 for map-side joins imposed by + TupleWritable encoding. (Jingkei Ly via cdouglas) + + HADOOP-5734. Correct block placement policy description in HDFS + Design document. (Konstantin Boudnik via shv) + + HADOOP-5657. Validate data in TestReduceFetch to improve merge test + coverage. (cdouglas) + + HADOOP-5613. Change S3Exception to checked exception. + (Andrew Hitchcock via tomwhite) + + HADOOP-5717. Create public enum class for the Framework counters in + org.apache.hadoop.mapreduce. (Amareshwari Sriramadasu via sharad) + + HADOOP-5217. Split AllTestDriver for core, hdfs and mapred. (sharad) + + HADOOP-5364. Add certificate expiration warning to HsftpFileSystem and HDFS + proxy. (Zhiyong Zhang via cdouglas) + + HADOOP-5733. Add map/reduce slot capacity and blacklisted capacity to + JobTracker metrics. (Sreekanth Ramakrishnan via cdouglas) + + HADOOP-5596. Add EnumSetWritable. (He Yongqiang via szetszwo) + + HADOOP-5727. Simplify hashcode for ID types. (Shevek via cdouglas) + + HADOOP-5500. In DBOutputFormat, where field names are absent permit the + number of fields to be sufficient to construct the select query. (Enis + Soztutar via cdouglas) + + HADOOP-5081. Split TestCLI into HDFS, Mapred and Core tests. (sharad) + + HADOOP-5015. Separate block management code from FSNamesystem. (Suresh + Srinivas via szetszwo) + + HADOOP-5080. Add new test cases to TestMRCLI and TestHDFSCLI + (V.Karthikeyan via nigel) + + HADOOP-5135. Splits the tests into different directories based on the + package. Four new test targets have been defined - run-test-core, + run-test-mapred, run-test-hdfs and run-test-hdfs-with-mr. + (Sharad Agarwal via ddas) + + HADOOP-5771. Implements unit tests for LinuxTaskController. + (Sreekanth Ramakrishnan and Vinod Kumar Vavilapalli via yhemanth) + + HADOOP-5419. Provide a facility to query the Queue ACLs for the + current user. + (Rahul Kumar Singh via yhemanth) + + HADOOP-5780. Improve per block message prited by "-metaSave" in HDFS. + (Raghu Angadi) + + HADOOP-5823. Added a new class DeprecatedUTF8 to help with removing + UTF8 related javac warnings. These warnings are removed in + FSEditLog.java as a use case. (Raghu Angadi) + + HADOOP-5824. Deprecate DataTransferProtocol.OP_READ_METADATA and remove + the corresponding unused codes. (Kan Zhang via szetszwo) + + HADOOP-5721. Factor out EditLogFileInputStream and EditLogFileOutputStream + into independent classes. (Luca Telloli & Flavio Junqueira via shv) + + HADOOP-5838. Fix a few javac warnings in HDFS. (Raghu Angadi) + + HADOOP-5854. Fix a few "Inconsistent Synchronization" warnings in HDFS. + (Raghu Angadi) + + HADOOP-5369. Small tweaks to reduce MapFile index size. (Ben Maurer + via sharad) + + HADOOP-5858. Eliminate UTF8 and fix warnings in test/hdfs-with-mr package. + (shv) + + HADOOP-5866. Move DeprecatedUTF8 from o.a.h.io to o.a.h.hdfs since it may + not be used outside hdfs. (Raghu Angadi) + + HADOOP-5857. Move normal java methods from hdfs .jsp files to .java files. + (szetszwo) + + HADOOP-5873. Remove deprecated methods randomDataNode() and + getDatanodeByIndex(..) in FSNamesystem. (szetszwo) + + HADOOP-5572. Improves the progress reporting for the sort phase for both + maps and reduces. (Ravi Gummadi via ddas) + + HADOOP-5839. Fix EC2 scripts to allow remote job submission. + (Joydeep Sen Sarma via tomwhite) + + HADOOP-5877. Fix javac warnings in TestHDFSServerPorts, TestCheckpoint, + TestNameEditsConfig, TestStartup and TestStorageRestore. + (Jakob Homan via shv) + + HADOOP-5438. Provide a single FileSystem method to create or + open-for-append to a file. (He Yongqiang via dhruba) + + HADOOP-5472. Change DistCp to support globbing of input paths. (Dhruba + Borthakur and Rodrigo Schmidt via szetszwo) + + HADOOP-5175. Don't unpack libjars on classpath. (Todd Lipcon via tomwhite) + + HADOOP-5620. Add an option to DistCp for preserving modification and access + times. (Rodrigo Schmidt via szetszwo) + + HADOOP-5664. Change map serialization so a lock is obtained only where + contention is possible, rather than for each write. (cdouglas) + + HADOOP-5896. Remove the dependency of GenericOptionsParser on + Option.withArgPattern. (Giridharan Kesavan and Sharad Agarwal via + sharad) + + HADOOP-5784. Makes the number of heartbeats that should arrive a second + at the JobTracker configurable. (Amareshwari Sriramadasu via ddas) + + HADOOP-5955. Changes TestFileOuputFormat so that is uses LOCAL_MR + instead of CLUSTER_MR. (Jothi Padmanabhan via das) + + HADOOP-5948. Changes TestJavaSerialization to use LocalJobRunner + instead of MiniMR/DFS cluster. (Jothi Padmanabhan via das) + + HADOOP-2838. Add mapred.child.env to pass environment variables to + tasktracker's child processes. (Amar Kamat via sharad) + + HADOOP-5961. DataNode process understand generic hadoop command line + options (like -Ddfs.property=value). (Raghu Angadi) + + HADOOP-5938. Change org.apache.hadoop.mapred.jobcontrol to use new + api. (Amareshwari Sriramadasu via sharad) + + HADOOP-2141. Improves the speculative execution heuristic. The heuristic + is currently based on the progress-rates of tasks and the expected time + to complete. Also, statistics about trackers are collected, and speculative + tasks are not given to the ones deduced to be slow. + (Andy Konwinski and ddas) + + HADOOP-5952. Change "-1 tests included" wording in test-patch.sh. + (Gary Murry via szetszwo) + + HADOOP-6106. Provides an option in ShellCommandExecutor to timeout + commands that do not complete within a certain amount of time. + (Sreekanth Ramakrishnan via yhemanth) + + HADOOP-5925. EC2 scripts should exit on error. (tomwhite) + + HADOOP-6109. Change Text to grow its internal buffer exponentially, rather + than the max of the current length and the proposed length to improve + performance reading large values. (thushara wijeratna via cdouglas) + + HADOOP-2366. Support trimmed strings in Configuration. (Michele Catasta + via szetszwo) + + HADOOP-6099. The RPC module can be configured to not send period pings. + The default behaviour of sending periodic pings remain unchanged. (dhruba) + + HADOOP-6142. Update documentation and use of harchives for relative paths + added in MAPREDUCE-739. (Mahadev Konar via cdouglas) + + HADOOP-6148. Implement a fast, pure Java CRC32 calculator which outperforms + java.util.zip.CRC32. (Todd Lipcon and Scott Carey via szetszwo) + + HADOOP-6146. Upgrade to JetS3t version 0.7.1. (tomwhite) + + HADOOP-6161. Add get/setEnum methods to Configuration. (cdouglas) + + HADOOP-6160. Fix releaseaudit target to run on specific directories. + (gkesavan) + + HADOOP-6169. Removing deprecated method calls in TFile. (hong tang via + mahadev) + + HADOOP-6176. Add a couple package private methods to AccessTokenHandler + for testing. (Kan Zhang via szetszwo) + + HADOOP-6182. Fix ReleaseAudit warnings (Giridharan Kesavan and Lee Tucker + via gkesavan) + + HADOOP-6173. Change src/native/packageNativeHadoop.sh to package all + native library files. (Hong Tang via szetszwo) + + HADOOP-6184. Provide an API to dump Configuration in a JSON format. + (V.V.Chaitanya Krishna via yhemanth) + + HADOOP-6224. Add a method to WritableUtils performing a bounded read of an + encoded String. (Jothi Padmanabhan via cdouglas) + + HADOOP-6133. Add a caching layer to Configuration::getClassByName to + alleviate a performance regression introduced in a compatibility layer. + (Todd Lipcon via cdouglas) + + HADOOP-6252. Provide a method to determine if a deprecated key is set in + config file. (Jakob Homan via suresh) + + HADOOP-5879. Read compression level and strategy from Configuration for + gzip compression. (He Yongqiang via cdouglas) + + HADOOP-6216. Support comments in host files. (Ravi Phulari and Dmytro + Molkov via szetszwo) + + HADOOP-6217. Update documentation for project split. (Corinne Chandel via + omalley) + + HADOOP-6268. Add ivy jar to .gitignore. (Todd Lipcon via cdouglas) + + HADOOP-6270. Support deleteOnExit in FileContext. (Suresh Srinivas via + szetszwo) + + HADOOP-6233. Rename configuration keys towards API standardization and + backward compatibility. (Jithendra Pandey via suresh) + + HADOOP-6260. Add additional unit tests for FileContext util methods. + (Gary Murry via suresh). + + HADOOP-6309. Change build.xml to run tests with java asserts. (Eli + Collins via szetszwo) + + HADOOP-6326. Hundson runs should check for AspectJ warnings and report + failure if any is present (cos) + + HADOOP-6329. Add build-fi directory to the ignore lists. (szetszwo) + + HADOOP-5107. Use Maven ant tasks to publish the subproject jars. + (Giridharan Kesavan via omalley) + + HADOOP-6343. Log unexpected throwable object caught in RPC. (Jitendra Nath + Pandey via szetszwo) + + HADOOP-6367. Removes Access Token implementation from common. + (Kan Zhang via ddas) + + HADOOP-6395. Upgrade some libraries to be consistent across common, hdfs, + and mapreduce. (omalley) + + HADOOP-6398. Build is broken after HADOOP-6395 patch has been applied (cos) + + HADOOP-6413. Move TestReflectionUtils to Common. (Todd Lipcon via tomwhite) + + HADOOP-6283. Improve the exception messages thrown by + FileUtil$HardLink.getLinkCount(..). (szetszwo) + + HADOOP-6279. Add Runtime::maxMemory to JVM metrics. (Todd Lipcon via + cdouglas) + + HADOOP-6305. Unify build property names to facilitate cross-projects + modifications (cos) + + HADOOP-6312. Remove unnecessary debug logging in Configuration constructor. + (Aaron Kimball via cdouglas) + + HADOOP-6366. Reduce ivy console output to ovservable level (cos) + + HADOOP-6400. Log errors getting Unix UGI. (Todd Lipcon via tomwhite) + + HADOOP-6346. Add support for specifying unpack pattern regex to + RunJar.unJar. (Todd Lipcon via tomwhite) + + HADOOP-6422. Make RPC backend plugable, protocol-by-protocol, to + ease evolution towards Avro. (cutting) + + HADOOP-5958. Use JDK 1.6 File APIs in DF.java wherever possible. + (Aaron Kimball via tomwhite) + + HADOOP-6222. Core doesn't have TestCommonCLI facility. (cos) + + HADOOP-6394. Add a helper class to simplify FileContext related tests and + improve code reusability. (Jitendra Nath Pandey via suresh) + + HADOOP-4656. Add a user to groups mapping service. (boryas, acmurthy) + + HADOOP-6435. Make RPC.waitForProxy with timeout public. (Steve Loughran + via tomwhite) + + HADOOP-6472. add tokenCache option to GenericOptionsParser for passing + file with secret keys to a map reduce job. (boryas) + + HADOOP-3205. Read multiple chunks directly from FSInputChecker subclass + into user buffers. (Todd Lipcon via tomwhite) + + HADOOP-6479. TestUTF8 assertions could fail with better text. + (Steve Loughran via tomwhite) + + HADOOP-6155. Deprecate RecordIO anticipating Avro. (Tom White via cdouglas) + + HADOOP-6492. Make some Avro serialization APIs public. + (Aaron Kimball via cutting) + + HADOOP-6497. Add an adapter for Avro's SeekableInput interface, so + that Avro can read FileSystem data. + (Aaron Kimball via cutting) + + HADOOP-6495. Identifier should be serialized after the password is + created In Token constructor (jnp via boryas) + + HADOOP-6518. Makes the UGI honor the env var KRB5CCNAME. + (Owen O'Malley via ddas) + + HADOOP-6531. Enhance FileUtil with an API to delete all contents of a + directory. (Amareshwari Sriramadasu via yhemanth) + + HADOOP-6547. Move DelegationToken into Common, so that it can be used by + MapReduce also. (devaraj via omalley) + + HADOOP-6552. Puts renewTGT=true and useTicketCache=true for the keytab + kerberos options. (ddas) + + HADOOP-6534. Trim whitespace from directory lists initializing + LocalDirAllocator. (Todd Lipcon via cdouglas) + + HADOOP-6559. Makes the RPC client automatically re-login when the SASL + connection setup fails. This is applicable only to keytab based logins. + (Devaraj Das) + + HADOOP-6551. Delegation token renewing and cancelling should provide + meaningful exceptions when there are failures instead of returning + false. (omalley) + + HADOOP-6583. Captures authentication and authorization metrics. (ddas) + + HADOOP-6543. Allows secure clients to talk to unsecure clusters. + (Kan Zhang via ddas) + + HADOOP-6579. Provide a mechanism for encoding/decoding Tokens from + a url-safe string and change the commons-code library to 1.4. (omalley) + + HADOOP-6596. Add a version field to the AbstractDelegationTokenIdentifier's + serialized value. (omalley) + + HADOOP-6573. Support for persistent delegation tokens. + (Jitendra Pandey via shv) + + HADOOP-6594. Provide a fetchdt tool via bin/hdfs. (jhoman via acmurthy) + + HADOOP-6589. Provide better error messages when RPC authentication fails. + (Kan Zhang via omalley) + + HADOOP-6599 Split existing RpcMetrics into RpcMetrics & RpcDetailedMetrics. + (Suresh Srinivas via Sanjay Radia) + + HADOOP-6537 Declare more detailed exceptions in FileContext and + AbstractFileSystem (Suresh Srinivas via Sanjay Radia) + + HADOOP-6486. fix common classes to work with Avro 1.3 reflection. + (cutting via tomwhite) + + HADOOP-6591. HarFileSystem can handle paths with the whitespace characters. + (Rodrigo Schmidt via dhruba) + + HADOOP-6407. Have a way to automatically update Eclipse .classpath file + when new libs are added to the classpath through Ivy. (tomwhite) + + HADOOP-3659. Patch to allow hadoop native to compile on Mac OS X. + (Colin Evans and Allen Wittenauer via tomwhite) + + HADOOP-6471. StringBuffer -> StringBuilder - conversion of references + as necessary. (Kay Kay via tomwhite) + + HADOOP-6646. Move HarfileSystem out of Hadoop Common. (mahadev) + + HADOOP-6566. Add methods supporting, enforcing narrower permissions on + local daemon directories. (Arun Murthy and Luke Lu via cdouglas) + + HADOOP-6705. Fix to work with 1.5 version of jiracli + (Giridharan Kesavan) + + HADOOP-6658. Exclude Private elements from generated Javadoc. (tomwhite) + + HADOOP-6635. Install/deploy source jars to Maven repo. + (Patrick Angeles via jghoman) + + HADOOP-6717. Log levels in o.a.h.security.Groups too high + (Todd Lipcon via jghoman) + + HADOOP-6667. RPC.waitForProxy should retry through NoRouteToHostException. + (Todd Lipcon via tomwhite) + + HADOOP-6677. InterfaceAudience.LimitedPrivate should take a string not an + enum. (tomwhite) + + HADOOP-678. Remove FileContext#isFile, isDirectory, and exists. + (Eli Collins via hairong) + + HADOOP-6515. Make maximum number of http threads configurable. + (Scott Chen via zshao) + + HADOOP-6563. Add more symlink tests to cover intermediate symlinks + in paths. (Eli Collins via suresh) + + HADOOP-6585. Add FileStatus#isDirectory and isFile. (Eli Collins via + tomwhite) + + HADOOP-6738. Move cluster_setup.xml from MapReduce to Common. + (Tom White via tomwhite) + + HADOOP-6794. Move configuration and script files post split. (tomwhite) + + HADOOP-6403. Deprecate EC2 bash scripts. (tomwhite) + + HADOOP-6769. Add an API in FileSystem to get FileSystem instances based + on users(ddas via boryas) + + HADOOP-6813. Add a new newInstance method in FileSystem that takes + a "user" as argument (ddas via boryas) + + HADOOP-6668. Apply audience and stability annotations to classes in + common. (tomwhite) + + HADOOP-6821. Document changes to memory monitoring. (Hemanth Yamijala + via tomwhite) + + OPTIMIZATIONS + + HADOOP-5595. NameNode does not need to run a replicator to choose a + random DataNode. (hairong) + + HADOOP-5603. Improve NameNode's block placement performance. (hairong) + + HADOOP-5638. More improvement on block placement performance. (hairong) + + HADOOP-6180. NameNode slowed down when many files with same filename + were moved to Trash. (Boris Shkolnik via hairong) + + HADOOP-6166. Further improve the performance of the pure-Java CRC32 + implementation. (Tsz Wo (Nicholas), SZE via cdouglas) + + HADOOP-6271. Add recursive and non recursive create and mkdir to + FileContext. (Sanjay Radia via suresh) + + HADOOP-6261. Add URI based tests for FileContext. + (Ravi Pulari via suresh). + + HADOOP-6307. Add a new SequenceFile.Reader constructor in order to support + reading on un-closed file. (szetszwo) + + HADOOP-6467. Improve the performance on HarFileSystem.listStatus(..). + (mahadev via szetszwo) + + HADOOP-6569. FsShell#cat should avoid calling unecessary getFileStatus + before opening a file to read. (hairong) + + HADOOP-6689. Add directory renaming test to existing FileContext tests. + (Eli Collins via suresh) + + HADOOP-6713. The RPC server Listener thread is a scalability bottleneck. + (Dmytro Molkov via hairong) + + BUG FIXES + + HADOOP-6748. Removes hadoop.cluster.administrators, cluster administrators + acl is passed as parameter in constructor. (amareshwari) + + HADOOP-6828. Herrior uses old way of accessing logs directories (Sreekanth + Ramakrishnan via cos) + + HADOOP-6788. [Herriot] Exception exclusion functionality is not working + correctly. (Vinay Thota via cos) + + HADOOP-6773. Ivy folder contains redundant files (cos) + + HADOOP-5379. CBZip2InputStream to throw IOException on data crc error. + (Rodrigo Schmidt via zshao) + + HADOOP-5326. Fixes CBZip2OutputStream data corruption problem. + (Rodrigo Schmidt via zshao) + + HADOOP-4963. Fixes a logging to do with getting the location of + map output file. (Amareshwari Sriramadasu via ddas) + + HADOOP-2337. Trash should close FileSystem on exit and should not start + emtying thread if disabled. (shv) + + HADOOP-5072. Fix failure in TestCodec because testSequenceFileGzipCodec + won't pass without native gzip codec. (Zheng Shao via dhruba) + + HADOOP-5050. TestDFSShell.testFilePermissions should not assume umask + setting. (Jakob Homan via szetszwo) + + HADOOP-4975. Set classloader for nested mapred.join configs. (Jingkei Ly + via cdouglas) + + HADOOP-5078. Remove invalid AMI kernel in EC2 scripts. (tomwhite) + + HADOOP-5045. FileSystem.isDirectory() should not be deprecated. (Suresh + Srinivas via szetszwo) + + HADOOP-4960. Use datasource time, rather than system time, during metrics + demux. (Eric Yang via cdouglas) + + HADOOP-5032. Export conf dir set in config script. (Eric Yang via cdouglas) + + HADOOP-5176. Fix a typo in TestDFSIO. (Ravi Phulari via szetszwo) + + HADOOP-4859. Distinguish daily rolling output dir by adding a timestamp. + (Jerome Boulon via cdouglas) + + HADOOP-4959. Correct system metric collection from top on Redhat 5.1. (Eric + Yang via cdouglas) + + HADOOP-5039. Fix log rolling regex to process only the relevant + subdirectories. (Jerome Boulon via cdouglas) + + HADOOP-5095. Update Chukwa watchdog to accept config parameter. (Jerome + Boulon via cdouglas) + + HADOOP-5147. Correct reference to agent list in Chukwa bin scripts. (Ari + Rabkin via cdouglas) + + HADOOP-5148. Fix logic disabling watchdog timer in Chukwa daemon scripts. + (Ari Rabkin via cdouglas) + + HADOOP-5100. Append, rather than truncate, when creating log4j metrics in + Chukwa. (Jerome Boulon via cdouglas) + + HADOOP-5204. Fix broken trunk compilation on Hudson by letting + task-controller be an independent target in build.xml. + (Sreekanth Ramakrishnan via yhemanth) + + HADOOP-5212. Fix the path translation problem introduced by HADOOP-4868 + running on cygwin. (Sharad Agarwal via omalley) + + HADOOP-5226. Add license headers to html and jsp files. (szetszwo) + + HADOOP-5172. Disable misbehaving Chukwa unit test until it can be fixed. + (Jerome Boulon via nigel) + + HADOOP-4933. Fixes a ConcurrentModificationException problem that shows up + when the history viewer is accessed concurrently. + (Amar Kamat via ddas) + + HADOOP-5253. Remove duplicate call to cn-docs target. + (Giri Kesavan via nigel) + + HADOOP-5251. Fix classpath for contrib unit tests to include clover jar. + (nigel) + + HADOOP-5206. Synchronize "unprotected*" methods of FSDirectory on the root. + (Jakob Homan via shv) + + HADOOP-5292. Fix NPE in KFS::getBlockLocations. (Sriram Rao via lohit) + + HADOOP-5219. Adds a new property io.seqfile.local.dir for use by + SequenceFile, which earlier used mapred.local.dir. (Sharad Agarwal + via ddas) + + HADOOP-5300. Fix ant javadoc-dev target and the typo in the class name + NameNodeActivtyMBean. (szetszwo) + + HADOOP-5218. libhdfs unit test failed because it was unable to + start namenode/datanode. Fixed. (dhruba) + + HADOOP-5273. Add license header to TestJobInProgress.java. (Jakob Homan + via szetszwo) + + HADOOP-5229. Remove duplicate version variables in build files + (Stefan Groschupf via johan) + + HADOOP-5383. Avoid building an unused string in NameNode's + verifyReplication(). (Raghu Angadi) + + HADOOP-5347. Create a job output directory for the bbp examples. (szetszwo) + + HADOOP-5341. Make hadoop-daemon scripts backwards compatible with the + changes in HADOOP-4868. (Sharad Agarwal via yhemanth) + + HADOOP-5456. Fix javadoc links to ClientProtocol#restoreFailedStorage(..). + (Boris Shkolnik via szetszwo) + + HADOOP-5458. Remove leftover Chukwa entries from build, etc. (cdouglas) + + HADOOP-5386. Modify hdfsproxy unit test to start on a random port, + implement clover instrumentation. (Zhiyong Zhang via cdouglas) + + HADOOP-5511. Add Apache License to EditLogBackupOutputStream. (shv) + + HADOOP-5507. Fix JMXGet javadoc warnings. (Boris Shkolnik via szetszwo) + + HADOOP-5191. Accessing HDFS with any ip or hostname should work as long + as it points to the interface NameNode is listening on. (Raghu Angadi) + + HADOOP-5561. Add javadoc.maxmemory parameter to build, preventing OOM + exceptions from javadoc-dev. (Jakob Homan via cdouglas) + + HADOOP-5149. Modify HistoryViewer to ignore unfamiliar files in the log + directory. (Hong Tang via cdouglas) + + HADOOP-5477. Fix rare failure in TestCLI for hosts returning variations of + 'localhost'. (Jakob Homan via cdouglas) + + HADOOP-5194. Disables setsid for tasks run on cygwin. + (Ravi Gummadi via ddas) + + HADOOP-5322. Fix misleading/outdated comments in JobInProgress. + (Amareshwari Sriramadasu via cdouglas) + + HADOOP-5198. Fixes a problem to do with the task PID file being absent and + the JvmManager trying to look for it. (Amareshwari Sriramadasu via ddas) + + HADOOP-5464. DFSClient did not treat write timeout of 0 properly. + (Raghu Angadi) + + HADOOP-4045. Fix processing of IO errors in EditsLog. + (Boris Shkolnik via shv) + + HADOOP-5462. Fixed a double free bug in the task-controller + executable. (Sreekanth Ramakrishnan via yhemanth) + + HADOOP-5652. Fix a bug where in-memory segments are incorrectly retained in + memory. (cdouglas) + + HADOOP-5533. Recovery duration shown on the jobtracker webpage is + inaccurate. (Amar Kamat via sharad) + + HADOOP-5647. Fix TestJobHistory to not depend on /tmp. (Ravi Gummadi + via sharad) + + HADOOP-5661. Fixes some findbugs warnings in o.a.h.mapred* packages and + supresses a bunch of them. (Jothi Padmanabhan via ddas) + + HADOOP-5704. Fix compilation problems in TestFairScheduler and + TestCapacityScheduler. (Chris Douglas via szetszwo) + + HADOOP-5650. Fix safemode messages in the Namenode log. (Suresh Srinivas + via szetszwo) + + HADOOP-5488. Removes the pidfile management for the Task JVM from the + framework and instead passes the PID back and forth between the + TaskTracker and the Task processes. (Ravi Gummadi via ddas) + + HADOOP-5658. Fix Eclipse templates. (Philip Zeyliger via shv) + + HADOOP-5709. Remove redundant synchronization added in HADOOP-5661. (Jothi + Padmanabhan via cdouglas) + + HADOOP-5715. Add conf/mapred-queue-acls.xml to the ignore lists. + (szetszwo) + + HADOOP-5592. Fix typo in Streaming doc in reference to GzipCodec. + (Corinne Chandel via tomwhite) + + HADOOP-5656. Counter for S3N Read Bytes does not work. (Ian Nowland + via tomwhite) + + HADOOP-5406. Fix JNI binding for ZlibCompressor::setDictionary. (Lars + Francke via cdouglas) + + HADOOP-3426. Fix/provide handling when DNS lookup fails on the loopback + address. Also cache the result of the lookup. (Steve Loughran via cdouglas) + + HADOOP-5476. Close the underlying InputStream in SequenceFile::Reader when + the constructor throws an exception. (Michael Tamm via cdouglas) + + HADOOP-5675. Do not launch a job if DistCp has no work to do. (Tsz Wo + (Nicholas), SZE via cdouglas) + + HADOOP-5737. Fixes a problem in the way the JobTracker used to talk to + other daemons like the NameNode to get the job's files. Also adds APIs + in the JobTracker to get the FileSystem objects as per the JobTracker's + configuration. (Amar Kamat via ddas) + + HADOOP-5648. Not able to generate gridmix.jar on the already compiled + version of hadoop. (gkesavan) + + HADOOP-5808. Fix import never used javac warnings in hdfs. (szetszwo) + + HADOOP-5203. TT's version build is too restrictive. (Rick Cox via sharad) + + HADOOP-5818. Revert the renaming from FSNamesystem.checkSuperuserPrivilege + to checkAccess by HADOOP-5643. (Amar Kamat via szetszwo) + + HADOOP-5820. Fix findbugs warnings for http related codes in hdfs. + (szetszwo) + + HADOOP-5822. Fix javac warnings in several dfs tests related to unncessary + casts. (Jakob Homan via szetszwo) + + HADOOP-5842. Fix a few javac warnings under packages fs and util. + (Hairong Kuang via szetszwo) + + HADOOP-5845. Build successful despite test failure on test-core target. + (sharad) + + HADOOP-5314. Prevent unnecessary saving of the file system image during + name-node startup. (Jakob Homan via shv) + + HADOOP-5855. Fix javac warnings for DisallowedDatanodeException and + UnsupportedActionException. (szetszwo) + + HADOOP-5582. Fixes a problem in Hadoop Vaidya to do with reading + counters from job history files. (Suhas Gogate via ddas) + + HADOOP-5829. Fix javac warnings found in ReplicationTargetChooser, + FSImage, Checkpointer, SecondaryNameNode and a few other hdfs classes. + (Suresh Srinivas via szetszwo) + + HADOOP-5835. Fix findbugs warnings found in Block, DataNode, NameNode and + a few other hdfs classes. (Suresh Srinivas via szetszwo) + + HADOOP-5853. Undeprecate HttpServer.addInternalServlet method. (Suresh + Srinivas via szetszwo) + + HADOOP-5801. Fixes the problem: If the hosts file is changed across restart + then it should be refreshed upon recovery so that the excluded hosts are + lost and the maps are re-executed. (Amar Kamat via ddas) + + HADOOP-5841. Resolve findbugs warnings in DistributedFileSystem, + DatanodeInfo, BlocksMap, DataNodeDescriptor. (Jakob Homan via szetszwo) + + HADOOP-5878. Fix import and Serializable javac warnings found in hdfs jsp. + (szetszwo) + + HADOOP-5782. Revert a few formatting changes introduced in HADOOP-5015. + (Suresh Srinivas via rangadi) + + HADOOP-5687. NameNode throws NPE if fs.default.name is the default value. + (Philip Zeyliger via shv) + + HADOOP-5867. Fix javac warnings found in NNBench and NNBenchWithoutMR. + (Konstantin Boudnik via szetszwo) + + HADOOP-5728. Fixed FSEditLog.printStatistics IndexOutOfBoundsException. + (Wang Xu via johan) + + HADOOP-5847. Fixed failing Streaming unit tests (gkesavan) + + HADOOP-5252. Streaming overrides -inputformat option (Klaas Bosteels + via sharad) + + HADOOP-5710. Counter MAP_INPUT_BYTES missing from new mapreduce api. + (Amareshwari Sriramadasu via sharad) + + HADOOP-5809. Fix job submission, broken by errant directory creation. + (Sreekanth Ramakrishnan and Jothi Padmanabhan via cdouglas) + + HADOOP-5635. Change distributed cache to work with other distributed file + systems. (Andrew Hitchcock via tomwhite) + + HADOOP-5856. Fix "unsafe multithreaded use of DateFormat" findbugs warning + in DataBlockScanner. (Kan Zhang via szetszwo) + + HADOOP-4864. Fixes a problem to do with -libjars with multiple jars when + client and cluster reside on different OSs. (Amareshwari Sriramadasu via + ddas) + + HADOOP-5623. Fixes a problem to do with status messages getting overwritten + in streaming jobs. (Rick Cox and Jothi Padmanabhan via ddas) + + HADOOP-5895. Fixes computation of count of merged bytes for logging. + (Ravi Gummadi via ddas) + + HADOOP-5805. problem using top level s3 buckets as input/output + directories. (Ian Nowland via tomwhite) + + HADOOP-5940. trunk eclipse-plugin build fails while trying to copy + commons-cli jar from the lib dir (Giridharan Kesavan via gkesavan) + + HADOOP-5864. Fix DMI and OBL findbugs in packages hdfs and metrics. + (hairong) + + HADOOP-5935. Fix Hudson's release audit warnings link is broken. + (Giridharan Kesavan via gkesavan) + + HADOOP-5947. Delete empty TestCombineFileInputFormat.java + + HADOOP-5899. Move a log message in FSEditLog to the right place for + avoiding unnecessary log. (Suresh Srinivas via szetszwo) + + HADOOP-5944. Add Apache license header to BlockManager.java. (Suresh + Srinivas via szetszwo) + + HADOOP-5891. SecondaryNamenode is able to converse with the NameNode + even when the default value of dfs.http.address is not overridden. + (Todd Lipcon via dhruba) + + HADOOP-5953. The isDirectory(..) and isFile(..) methods in KosmosFileSystem + should not be deprecated. (szetszwo) + + HADOOP-5954. Fix javac warnings in TestFileCreation, TestSmallBlock, + TestFileStatus, TestDFSShellGenericOptions, TestSeekBug and + TestDFSStartupVersions. (szetszwo) + + HADOOP-5956. Fix ivy dependency in hdfsproxy and capacity-scheduler. + (Giridharan Kesavan via szetszwo) + + HADOOP-5836. Bug in S3N handling of directory markers using an object with + a trailing "/" causes jobs to fail. (Ian Nowland via tomwhite) + + HADOOP-5861. s3n files are not getting split by default. (tomwhite) + + HADOOP-5762. Fix a problem that DistCp does not copy empty directory. + (Rodrigo Schmidt via szetszwo) + + HADOOP-5859. Fix "wait() or sleep() with locks held" findbugs warnings in + DFSClient. (Kan Zhang via szetszwo) + + HADOOP-5457. Fix to continue to run builds even if contrib test fails + (Giridharan Kesavan via gkesavan) + + HADOOP-5963. Remove an unnecessary exception catch in NNBench. (Boris + Shkolnik via szetszwo) + + HADOOP-5989. Fix streaming test failure. (gkesavan) + + HADOOP-5981. Fix a bug in HADOOP-2838 in parsing mapred.child.env. + (Amar Kamat via sharad) + + HADOOP-5420. Fix LinuxTaskController to kill tasks using the process + groups they are launched with. + (Sreekanth Ramakrishnan via yhemanth) + + HADOOP-6031. Remove @author tags from Java source files. (Ravi Phulari + via szetszwo) + + HADOOP-5980. Fix LinuxTaskController so tasks get passed + LD_LIBRARY_PATH and other environment variables. + (Sreekanth Ramakrishnan via yhemanth) + + HADOOP-4041. IsolationRunner does not work as documented. + (Philip Zeyliger via tomwhite) + + HADOOP-6004. Fixes BlockLocation deserialization. (Jakob Homan via + szetszwo) + + HADOOP-6079. Serialize proxySource as DatanodeInfo in DataTransferProtocol. + (szetszwo) + + HADOOP-6096. Fix Eclipse project and classpath files following project + split. (tomwhite) + + HADOOP-6122. The great than operator in test-patch.sh should be "-gt" but + not ">". (szetszwo) + + HADOOP-6114. Fix javadoc documentation for FileStatus.getLen. + (Dmitry Rzhevskiy via dhruba) + + HADOOP-6131. A sysproperty should not be set unless the property + is set on the ant command line in build.xml (hong tang via mahadev) + + HADOOP-6137. Fix project specific test-patch requirements + (Giridharan Kesavan) + + HADOOP-6138. Eliminate the deprecated warnings introduced by H-5438. + (He Yongqiang via szetszwo) + + HADOOP-6132. RPC client create an extra connection because of incorrect + key for connection cache. (Kan Zhang via rangadi) + + HADOOP-6123. Add missing classpaths in hadoop-config.sh. (Sharad Agarwal + via szetszwo) + + HADOOP-6172. Fix jar file names in hadoop-config.sh and include + ${build.src} as a part of the source list in build.xml. (Hong Tang via + szetszwo) + + HADOOP-6124. Fix javac warning detection in test-patch.sh. (Giridharan + Kesavan via szetszwo) + + HADOOP-6177. FSInputChecker.getPos() would return position greater + than the file size. (Hong Tang via hairong) + + HADOOP-6188. TestTrash uses java.io.File api but not hadoop FileSystem api. + (Boris Shkolnik via szetszwo) + + HADOOP-6192. Fix Shell.getUlimitMemoryCommand to not rely on Map-Reduce + specific configs. (acmurthy) + + HADOOP-6103. Clones the classloader as part of Configuration clone. + (Amareshwari Sriramadasu via ddas) + + HADOOP-6152. Fix classpath variables in bin/hadoop-config.sh and some + other scripts. (Aaron Kimball via szetszwo) + + HADOOP-6215. fix GenericOptionParser to deal with -D with '=' in the + value. (Amar Kamat via sharad) + + HADOOP-6227. Fix Configuration to allow final parameters to be set to null + and prevent them from being overridden. + (Amareshwari Sriramadasu via yhemanth) + + HADOOP-6199. Move io.map.skip.index property to core-default from mapred. + (Amareshwari Sriramadasu via cdouglas) + + HADOOP-6229. Attempt to make a directory under an existing file on + LocalFileSystem should throw an Exception. (Boris Shkolnik via tomwhite) + + HADOOP-6243. Fix a NullPointerException in processing deprecated keys. + (Sreekanth Ramakrishnan via yhemanth) + + HADOOP-6009. S3N listStatus incorrectly returns null instead of empty + array when called on empty root. (Ian Nowland via tomwhite) + + HADOOP-6181. Fix .eclipse.templates/.classpath for avro and jets3t jar + files. (Carlos Valiente via szetszwo) + + HADOOP-6196. Fix a bug in SequenceFile.Reader where syncing within the + header would cause the reader to read the sync marker as a record. (Jay + Booth via cdouglas) + + HADOOP-6250. Modify test-patch to delete copied XML files before running + patch build. (Rahul Kumar Singh via yhemanth) + + HADOOP-6257. Two TestFileSystem classes are confusing + hadoop-hdfs-hdfwithmr. (Philip Zeyliger via tomwhite) + + HADOOP-6151. Added a input filter to all of the http servlets that quotes + html characters in the parameters, to prevent cross site scripting + attacks. (omalley) + + HADOOP-6274. Fix TestLocalFSFileContextMainOperations test failure. + (Gary Murry via suresh). + + HADOOP-6281. Avoid null pointer exceptions when the jsps don't have + paramaters (omalley) + + HADOOP-6285. Fix the result type of the getParameterMap method in the + HttpServer.QuotingInputFilter. (omalley) + + HADOOP-6286. Fix bugs in related to URI handling in glob methods in + FileContext. (Boris Shkolnik via suresh) + + HADOOP-6292. Update native libraries guide. (Corinne Chandel via cdouglas) + + HADOOP-6327. FileContext tests should not use /tmp and should clean up + files. (Sanjay Radia via szetszwo) + + HADOOP-6318. Upgrade to Avro 1.2.0. (cutting) + + HADOOP-6334. Fix GenericOptionsParser to understand URI for -files, + -libjars and -archives options and fix Path to support URI with fragment. + (Amareshwari Sriramadasu via szetszwo) + + HADOOP-6344. Fix rm and rmr immediately delete files rather than sending + to trash, if a user is over-quota. (Jakob Homan via suresh) + + HADOOP-6347. run-test-core-fault-inject runs a test case twice if + -Dtestcase is set (cos) + + HADOOP-6375. Sync documentation for FsShell du with its implementation. + (Todd Lipcon via cdouglas) + + HADOOP-6441. Protect web ui from cross site scripting attacks (XSS) on + the host http header and using encoded utf-7. (omalley) + + HADOOP-6451. Fix build to run contrib unit tests. (Tom White via cdouglas) + + HADOOP-6374. JUnit tests should never depend on anything in conf. + (Anatoli Fomenko via cos) + + HADOOP-6290. Prevent duplicate slf4j-simple jar via Avro's classpath. + (Owen O'Malley via cdouglas) + + HADOOP-6293. Fix FsShell -text to work on filesystems other than the + default. (cdouglas) + + HADOOP-6341. Fix test-patch.sh for checkTests function. (gkesavan) + + HADOOP-6314. Fix "fs -help" for the "-count" commond. (Ravi Phulari via + szetszwo) + + HADOOP-6405. Update Eclipse configuration to match changes to Ivy + configuration (Edwin Chan via cos) + + HADOOP-6411. Remove deprecated file src/test/hadoop-site.xml. (cos) + + HADOOP-6386. NameNode's HttpServer can't instantiate InetSocketAddress: + IllegalArgumentException is thrown (cos) + + HADOOP-6254. Slow reads cause s3n to fail with SocketTimeoutException. + (Andrew Hitchcock via tomwhite) + + HADOOP-6428. HttpServer sleeps with negative values. (cos) + + HADOOP-6414. Add command line help for -expunge command. + (Ravi Phulari via tomwhite) + + HADOOP-6391. Classpath should not be part of command line arguments. + (Cristian Ivascu via tomwhite) + + HADOOP-6462. Target "compile" does not exist in contrib/cloud. (tomwhite) + + HADOOP-6402. testConf.xsl is not well-formed XML. (Steve Loughran + via tomwhite) + + HADOOP-6489. Fix 3 findbugs warnings. (Erik Steffl via suresh) + + HADOOP-6517. Fix UserGroupInformation so that tokens are saved/retrieved + to/from the embedded Subject (Owen O'Malley & Kan Zhang via ddas) + + HADOOP-6538. Sets hadoop.security.authentication to simple by default. + (ddas) + + HADOOP-6540. Contrib unit tests have invalid XML for core-site, etc. + (Aaron Kimball via tomwhite) + + HADOOP-6521. User specified umask using deprecated dfs.umask must override + server configured using new dfs.umaskmode for backward compatibility. + (suresh) + + HADOOP-6522. Fix decoding of codepoint zero in UTF8. (cutting) + + HADOOP-6505. Use tr rather than sed to effect literal substitution in the + build script. (Allen Wittenauer via cdouglas) + + HADOOP-6548. Replace mortbay imports with commons logging. (cdouglas) + + HADOOP-6560. Handle invalid har:// uri in HarFileSystem. (szetszwo) + + HADOOP-6549. TestDoAsEffectiveUser should use ip address of the host + for superuser ip check(jnp via boryas) + + HADOOP-6570. RPC#stopProxy throws NPE if getProxyEngine(proxy) returns + null. (hairong) + + HADOOP-6558. Return null in HarFileSystem.getFileChecksum(..) since no + checksum algorithm is implemented. (szetszwo) + + HADOOP-6572. Makes sure that SASL encryption and push to responder + queue for the RPC response happens atomically. (Kan Zhang via ddas) + + HADOOP-6545. Changes the Key for the FileSystem cache to be UGI (ddas) + + HADOOP-6609. Fixed deadlock in RPC by replacing shared static + DataOutputBuffer in the UTF8 class with a thread local variable. (omalley) + + HADOOP-6504. Invalid example in the documentation of + org.apache.hadoop.util.Tool. (Benoit Sigoure via tomwhite) + + HADOOP-6546. BloomMapFile can return false negatives. (Clark Jefcoat + via tomwhite) + + HADOOP-6593. TextRecordInputStream doesn't close SequenceFile.Reader. + (Chase Bradford via tomwhite) + + HADOOP-6175. Incorrect version compilation with es_ES.ISO8859-15 locale + on Solaris 10. (Urko Benito via tomwhite) + + HADOOP-6645. Bugs on listStatus for HarFileSystem (rodrigo via mahadev) + + HADOOP-6645. Re: Bugs on listStatus for HarFileSystem (rodrigo via + mahadev) + + HADOOP-6654. Fix code example in WritableComparable javadoc. (Tom White + via szetszwo) + + HADOOP-6640. FileSystem.get() does RPC retries within a static + synchronized block. (hairong) + + HADOOP-6691. TestFileSystemCaching sometimes hangs. (hairong) + + HADOOP-6507. Hadoop Common Docs - delete 3 doc files that do not belong + under Common. (Corinne Chandel via tomwhite) + + HADOOP-6439. Fixes handling of deprecated keys to follow order in which + keys are defined. (V.V.Chaitanya Krishna via yhemanth) + + HADOOP-6690. FilterFileSystem correctly handles setTimes call. + (Rodrigo Schmidt via dhruba) + + HADOOP-6703. Prevent renaming a file, directory or symbolic link to + itself. (Eli Collins via suresh) + + HADOOP-6710. Symbolic umask for file creation is not conformant with posix. + (suresh) + + HADOOP-6719. Insert all missing methods in FilterFs. + (Rodrigo Schmidt via dhruba) + + HADOOP-6724. IPC doesn't properly handle IOEs thrown by socket factory. + (Todd Lipcon via tomwhite) + + HADOOP-6722. NetUtils.connect should check that it hasn't connected a socket + to itself. (Todd Lipcon via tomwhite) + + HADOOP-6634. Fix AccessControlList to use short names to verify access + control. (Vinod Kumar Vavilapalli via sharad) + + HADOOP-6709. Re-instate deprecated FileSystem methods that were removed + after 0.20. (tomwhite) + + HADOOP-6630. hadoop-config.sh fails to get executed if hadoop wrapper + scripts are in path. (Allen Wittenauer via tomwhite) + + HADOOP-6742. Add methods HADOOP-6709 from to TestFilterFileSystem. + (Eli Collins via tomwhite) + + HADOOP-6727. Remove UnresolvedLinkException from public FileContext APIs. + (Eli Collins via tomwhite) + + HADOOP-6631. Fix FileUtil.fullyDelete() to continue deleting other files + despite failure at any level. (Contributed by Ravi Gummadi and + Vinod Kumar Vavilapalli) + + HADOOP-6723. Unchecked exceptions thrown in IPC Connection should not + orphan clients. (Todd Lipcon via tomwhite) + + HADOOP-6404. Rename the generated artifacts to common instead of core. + (tomwhite) + + HADOOP-6461. Webapps aren't located correctly post-split. + (Todd Lipcon and Steve Loughran via tomwhite) + + HADOOP-6826. Revert FileSystem create method that takes CreateFlags. + (tomwhite) + + HADOOP-6800. Harmonize JAR library versions. (tomwhite) + + HADOOP-6847. Problem staging 0.21.0 artifacts to Apache Nexus Maven + Repository (Giridharan Kesavan via cos) + + HADOOP-6819. [Herriot] Shell command for getting the new exceptions in + the logs returning exitcode 1 after executing successfully. (Vinay Thota + via cos) + + HADOOP-6839. [Herriot] Implement a functionality for getting the user list + for creating proxy users. (Vinay Thota via cos) + + HADOOP-6836. [Herriot]: Generic method for adding/modifying the attributes + for new configuration. (Vinay Thota via cos) + + HADOOP-6860. 'compile-fault-inject' should never be called directly. + (Konstantin Boudnik) + + HADOOP-6790. Instrumented (Herriot) build uses too wide mask to include + aspect files. (Konstantin Boudnik) + + HADOOP-6875. [Herriot] Cleanup of temp. configurations is needed upon + restart of a cluster (Vinay Thota via cos) + +Release 0.20.3 - Unreleased + + NEW FEATURES + + HADOOP-6637. Benchmark for establishing RPC session. (shv) + + BUG FIXES + + HADOOP-6760. WebServer shouldn't increase port number in case of negative + port setting caused by Jetty's race (cos) + + HADOOP-6881. Make WritableComparator intialize classes when + looking for their raw comparator, as classes often register raw + comparators in initializers, which are no longer automatically run + in Java 6 when a class is referenced. (cutting via omalley) + + HADOOP-7072. Remove java5 dependencies from build. (cos) + +Release 0.20.204.0 - Unreleased + + NEW FEATURES + + HADOOP-6255. Create RPM and Debian packages for common. Changes deployment + layout to be consistent across the binary tgz, rpm, and deb. Adds setup + scripts for easy one node cluster configuration and user creation. + (Eric Yang via omalley) + +Release 0.20.203.0 - 2011-5-11 + + BUG FIXES + + HADOOP-7258. The Gzip codec should not return null decompressors. (omalley) + +Release 0.20.2 - 2010-2-16 + + NEW FEATURES + + HADOOP-6218. Adds a feature where TFile can be split by Record + Sequence number. (Hong Tang and Raghu Angadi via ddas) + + BUG FIXES + + HADOOP-6231. Allow caching of filesystem instances to be disabled on a + per-instance basis. (tomwhite) + + HADOOP-5759. Fix for IllegalArgumentException when CombineFileInputFormat + is used as job InputFormat. (Amareshwari Sriramadasu via dhruba) + + HADOOP-6097. Fix Path conversion in makeQualified and reset LineReader byte + count at the start of each block in Hadoop archives. (Ben Slusky, Tom + White, and Mahadev Konar via cdouglas) + + HADOOP-6269. Fix threading issue with defaultResource in Configuration. + (Sreekanth Ramakrishnan via cdouglas) + + HADOOP-6460. Reinitializes buffers used for serializing responses in ipc + server on exceeding maximum response size to free up Java heap. (suresh) + + HADOOP-6315. Avoid incorrect use of BuiltInflater/BuiltInDeflater in + GzipCodec. (Aaron Kimball via cdouglas) + + HADOOP-6498. IPC client bug may cause rpc call hang. (Ruyue Ma and + hairong via hairong) + + IMPROVEMENTS + + HADOOP-5611. Fix C++ libraries to build on Debian Lenny. (Todd Lipcon + via tomwhite) + + HADOOP-5612. Some c++ scripts are not chmodded before ant execution. + (Todd Lipcon via tomwhite) + + HADOOP-1849. Add undocumented configuration parameter for per handler + call queue size in IPC Server. (shv) + +Release 0.20.1 - 2009-09-01 + + INCOMPATIBLE CHANGES + + HADOOP-5726. Remove pre-emption from capacity scheduler code base. + (Rahul Kumar Singh via yhemanth) + + HADOOP-5881. Simplify memory monitoring and scheduling related + configuration. (Vinod Kumar Vavilapalli via yhemanth) + + NEW FEATURES + + HADOOP-6080. Introduce -skipTrash option to rm and rmr. + (Jakob Homan via shv) + + HADOOP-3315. Add a new, binary file foramt, TFile. (Hong Tang via cdouglas) + + IMPROVEMENTS + + HADOOP-5711. Change Namenode file close log to info. (szetszwo) + + HADOOP-5736. Update the capacity scheduler documentation for features + like memory based scheduling, job initialization and removal of pre-emption. + (Sreekanth Ramakrishnan via yhemanth) + + HADOOP-5714. Add a metric for NameNode getFileInfo operation. (Jakob Homan + via szetszwo) + + HADOOP-4372. Improves the way history filenames are obtained and manipulated. + (Amar Kamat via ddas) + + HADOOP-5897. Add name-node metrics to capture java heap usage. + (Suresh Srinivas via shv) + + OPTIMIZATIONS + + BUG FIXES + + HADOOP-5691. Makes org.apache.hadoop.mapreduce.Reducer concrete class + instead of abstract. (Amareshwari Sriramadasu via sharad) + + HADOOP-5646. Fixes a problem in TestQueueCapacities. + (Vinod Kumar Vavilapalli via ddas) + + HADOOP-5655. TestMRServerPorts fails on java.net.BindException. (Devaraj + Das via hairong) + + HADOOP-5654. TestReplicationPolicy. fails on java.net.BindException. + (hairong) + + HADOOP-5688. Fix HftpFileSystem checksum path construction. (Tsz Wo + (Nicholas) Sze via cdouglas) + + HADOOP-4674. Fix fs help messages for -test, -text, -tail, -stat + and -touchz options. (Ravi Phulari via szetszwo) + + HADOOP-5718. Remove the check for the default queue in capacity scheduler. + (Sreekanth Ramakrishnan via yhemanth) + + HADOOP-5719. Remove jobs that failed initialization from the waiting queue + in the capacity scheduler. (Sreekanth Ramakrishnan via yhemanth) + + HADOOP-4744. Attaching another fix to the jetty port issue. The TaskTracker + kills itself if it ever discovers that the port to which jetty is actually + bound is invalid (-1). (ddas) + + HADOOP-5349. Fixes a problem in LocalDirAllocator to check for the return + path value that is returned for the case where the file we want to write + is of an unknown size. (Vinod Kumar Vavilapalli via ddas) + + HADOOP-5636. Prevents a job from going to RUNNING state after it has been + KILLED (this used to happen when the SetupTask would come back with a + success after the job has been killed). (Amar Kamat via ddas) + + HADOOP-5641. Fix a NullPointerException in capacity scheduler's memory + based scheduling code when jobs get retired. (yhemanth) + + HADOOP-5828. Use absolute path for mapred.local.dir of JobTracker in + MiniMRCluster. (yhemanth) + + HADOOP-4981. Fix capacity scheduler to schedule speculative tasks + correctly in the presence of High RAM jobs. + (Sreekanth Ramakrishnan via yhemanth) + + HADOOP-5210. Solves a problem in the progress report of the reduce task. + (Ravi Gummadi via ddas) + + HADOOP-5850. Fixes a problem to do with not being able to jobs with + 0 maps/reduces. (Vinod K V via ddas) + + HADOOP-4626. Correct the API links in hdfs forrest doc so that they + point to the same version of hadoop. (szetszwo) + + HADOOP-5883. Fixed tasktracker memory monitoring to account for + momentary spurts in memory usage due to java's fork() model. + (yhemanth) + + HADOOP-5539. Fixes a problem to do with not preserving intermediate + output compression for merged data. + (Jothi Padmanabhan and Billy Pearson via ddas) + + HADOOP-5932. Fixes a problem in capacity scheduler in computing + available memory on a tasktracker. + (Vinod Kumar Vavilapalli via yhemanth) + + HADOOP-5908. Fixes a problem to do with ArithmeticException in the + JobTracker when there are jobs with 0 maps. (Amar Kamat via ddas) + + HADOOP-5924. Fixes a corner case problem to do with job recovery with + empty history files. Also, after a JT restart, sends KillTaskAction to + tasks that report back but the corresponding job hasn't been initialized + yet. (Amar Kamat via ddas) + + HADOOP-5882. Fixes a reducer progress update problem for new mapreduce + api. (Amareshwari Sriramadasu via sharad) + + HADOOP-5746. Fixes a corner case problem in Streaming, where if an exception + happens in MROutputThread after the last call to the map/reduce method, the + exception goes undetected. (Amar Kamat via ddas) + + HADOOP-5884. Fixes accounting in capacity scheduler so that high RAM jobs + take more slots. (Vinod Kumar Vavilapalli via yhemanth) + + HADOOP-5937. Correct a safemode message in FSNamesystem. (Ravi Phulari + via szetszwo) + + HADOOP-5869. Fix bug in assignment of setup / cleanup task that was + causing TestQueueCapacities to fail. + (Sreekanth Ramakrishnan via yhemanth) + + HADOOP-5921. Fixes a problem in the JobTracker where it sometimes never used + to come up due to a system file creation on JobTracker's system-dir failing. + This problem would sometimes show up only when the FS for the system-dir + (usually HDFS) is started at nearly the same time as the JobTracker. + (Amar Kamat via ddas) + + HADOOP-5920. Fixes a testcase failure for TestJobHistory. + (Amar Kamat via ddas) + + HADOOP-6139. Fix the FsShell help messages for rm and rmr. (Jakob Homan + via szetszwo) + + HADOOP-6145. Fix FsShell rm/rmr error messages when there is a FNFE. + (Jakob Homan via szetszwo) + + HADOOP-6150. Users should be able to instantiate comparator using TFile + API. (Hong Tang via rangadi) + +Release 0.20.0 - 2009-04-15 + + INCOMPATIBLE CHANGES + + HADOOP-4210. Fix findbugs warnings for equals implementations of mapred ID + classes. Removed public, static ID::read and ID::forName; made ID an + abstract class. (Suresh Srinivas via cdouglas) + + HADOOP-4253. Fix various warnings generated by findbugs. + Following deprecated methods in RawLocalFileSystem are removed: + public String getName() + public void lock(Path p, boolean shared) + public void release(Path p) + (Suresh Srinivas via johan) + + HADOOP-4618. Move http server from FSNamesystem into NameNode. + FSNamesystem.getNameNodeInfoPort() is removed. + FSNamesystem.getDFSNameNodeMachine() and FSNamesystem.getDFSNameNodePort() + replaced by FSNamesystem.getDFSNameNodeAddress(). + NameNode(bindAddress, conf) is removed. + (shv) + + HADOOP-4567. GetFileBlockLocations returns the NetworkTopology + information of the machines where the blocks reside. (dhruba) + + HADOOP-4435. The JobTracker WebUI displays the amount of heap memory + in use. (dhruba) + + HADOOP-4628. Move Hive into a standalone subproject. (omalley) + + HADOOP-4188. Removes task's dependency on concrete filesystems. + (Sharad Agarwal via ddas) + + HADOOP-1650. Upgrade to Jetty 6. (cdouglas) + + HADOOP-3986. Remove static Configuration from JobClient. (Amareshwari + Sriramadasu via cdouglas) + JobClient::setCommandLineConfig is removed + JobClient::getCommandLineConfig is removed + JobShell, TestJobShell classes are removed + + HADOOP-4422. S3 file systems should not create bucket. + (David Phillips via tomwhite) + + HADOOP-4035. Support memory based scheduling in capacity scheduler. + (Vinod Kumar Vavilapalli via yhemanth) + + HADOOP-3497. Fix bug in overly restrictive file globbing with a + PathFilter. (tomwhite) + + HADOOP-4445. Replace running task counts with running task + percentage in capacity scheduler UI. (Sreekanth Ramakrishnan via + yhemanth) + + HADOOP-4631. Splits the configuration into three parts - one for core, + one for mapred and the last one for HDFS. (Sharad Agarwal via cdouglas) + + HADOOP-3344. Fix libhdfs build to use autoconf and build the same + architecture (32 vs 64 bit) of the JVM running Ant. The libraries for + pipes, utils, and libhdfs are now all in c++//lib. + (Giridharan Kesavan via nigel) + + HADOOP-4874. Remove LZO codec because of licensing issues. (omalley) + + HADOOP-4970. The full path name of a file is preserved inside Trash. + (Prasad Chakka via dhruba) + + HADOOP-4103. NameNode keeps a count of missing blocks. It warns on + WebUI if there are such blocks. '-report' and '-metaSave' have extra + info to track such blocks. (Raghu Angadi) + + HADOOP-4783. Change permissions on history files on the jobtracker + to be only group readable instead of world readable. + (Amareshwari Sriramadasu via yhemanth) + + NEW FEATURES + + HADOOP-4575. Add a proxy service for relaying HsftpFileSystem requests. + Includes client authentication via user certificates and config-based + access control. (Kan Zhang via cdouglas) + + HADOOP-4661. Add DistCh, a new tool for distributed ch{mod,own,grp}. + (szetszwo) + + HADOOP-4709. Add several new features and bug fixes to Chukwa. + Added Hadoop Infrastructure Care Center (UI for visualize data collected + by Chukwa) + Added FileAdaptor for streaming small file in one chunk + Added compression to archive and demux output + Added unit tests and validation for agent, collector, and demux map + reduce job + Added database loader for loading demux output (sequence file) to jdbc + connected database + Added algorithm to distribute collector load more evenly + (Jerome Boulon, Eric Yang, Andy Konwinski, Ariel Rabkin via cdouglas) + + HADOOP-4179. Add Vaidya tool to analyze map/reduce job logs for performanc + problems. (Suhas Gogate via omalley) + + HADOOP-4029. Add NameNode storage information to the dfshealth page and + move DataNode information to a separated page. (Boris Shkolnik via + szetszwo) + + HADOOP-4348. Add service-level authorization for Hadoop. (acmurthy) + + HADOOP-4826. Introduce admin command saveNamespace. (shv) + + HADOOP-3063 BloomMapFile - fail-fast version of MapFile for sparsely + populated key space (Andrzej Bialecki via stack) + + HADOOP-1230. Add new map/reduce API and deprecate the old one. Generally, + the old code should work without problem. The new api is in + org.apache.hadoop.mapreduce and the old classes in org.apache.hadoop.mapred + are deprecated. Differences in the new API: + 1. All of the methods take Context objects that allow us to add new + methods without breaking compatability. + 2. Mapper and Reducer now have a "run" method that is called once and + contains the control loop for the task, which lets applications + replace it. + 3. Mapper and Reducer by default are Identity Mapper and Reducer. + 4. The FileOutputFormats use part-r-00000 for the output of reduce 0 and + part-m-00000 for the output of map 0. + 5. The reduce grouping comparator now uses the raw compare instead of + object compare. + 6. The number of maps in FileInputFormat is controlled by min and max + split size rather than min size and the desired number of maps. + (omalley) + + HADOOP-3305. Use Ivy to manage dependencies. (Giridharan Kesavan + and Steve Loughran via cutting) + + IMPROVEMENTS + + HADOOP-4749. Added a new counter REDUCE_INPUT_BYTES. (Yongqiang He via + zshao) + + HADOOP-4234. Fix KFS "glue" layer to allow applications to interface + with multiple KFS metaservers. (Sriram Rao via lohit) + + HADOOP-4245. Update to latest version of KFS "glue" library jar. + (Sriram Rao via lohit) + + HADOOP-4244. Change test-patch.sh to check Eclipse classpath no matter + it is run by Hudson or not. (szetszwo) + + HADOOP-3180. Add name of missing class to WritableName.getClass + IOException. (Pete Wyckoff via omalley) + + HADOOP-4178. Make the capacity scheduler's default values configurable. + (Sreekanth Ramakrishnan via omalley) + + HADOOP-4262. Generate better error message when client exception has null + message. (stevel via omalley) + + HADOOP-4226. Refactor and document LineReader to make it more readily + understandable. (Yuri Pradkin via cdouglas) + + HADOOP-4238. When listing jobs, if scheduling information isn't available + print NA instead of empty output. (Sreekanth Ramakrishnan via johan) + + HADOOP-4284. Support filters that apply to all requests, or global filters, + to HttpServer. (Kan Zhang via cdouglas) + + HADOOP-4276. Improve the hashing functions and deserialization of the + mapred ID classes. (omalley) + + HADOOP-4485. Add a compile-native ant task, as a shorthand. (enis) + + HADOOP-4454. Allow # comments in slaves file. (Rama Ramasamy via omalley) + + HADOOP-3461. Remove hdfs.StringBytesWritable. (szetszwo) + + HADOOP-4437. Use Halton sequence instead of java.util.Random in + PiEstimator. (szetszwo) + + HADOOP-4572. Change INode and its sub-classes to package private. + (szetszwo) + + HADOOP-4187. Does a runtime lookup for JobConf/JobConfigurable, and if + found, invokes the appropriate configure method. (Sharad Agarwal via ddas) + + HADOOP-4453. Improve ssl configuration and handling in HsftpFileSystem, + particularly when used with DistCp. (Kan Zhang via cdouglas) + + HADOOP-4583. Several code optimizations in HDFS. (Suresh Srinivas via + szetszwo) + + HADOOP-3923. Remove org.apache.hadoop.mapred.StatusHttpServer. (szetszwo) + + HADOOP-4622. Explicitly specify interpretor for non-native + pipes binaries. (Fredrik Hedberg via johan) + + HADOOP-4505. Add a unit test to test faulty setup task and cleanup + task killing the job. (Amareshwari Sriramadasu via johan) + + HADOOP-4608. Don't print a stack trace when the example driver gets an + unknown program to run. (Edward Yoon via omalley) + + HADOOP-4645. Package HdfsProxy contrib project without the extra level + of directories. (Kan Zhang via omalley) + + HADOOP-4126. Allow access to HDFS web UI on EC2 (tomwhite via omalley) + + HADOOP-4612. Removes RunJar's dependency on JobClient. + (Sharad Agarwal via ddas) + + HADOOP-4185. Adds setVerifyChecksum() method to FileSystem. + (Sharad Agarwal via ddas) + + HADOOP-4523. Prevent too many tasks scheduled on a node from bringing + it down by monitoring for cumulative memory usage across tasks. + (Vinod Kumar Vavilapalli via yhemanth) + + HADOOP-4640. Adds an input format that can split lzo compressed + text files. (johan) + + HADOOP-4666. Launch reduces only after a few maps have run in the + Fair Scheduler. (Matei Zaharia via johan) + + HADOOP-4339. Remove redundant calls from FileSystem/FsShell when + generating/processing ContentSummary. (David Phillips via cdouglas) + + HADOOP-2774. Add counters tracking records spilled to disk in MapTask and + ReduceTask. (Ravi Gummadi via cdouglas) + + HADOOP-4513. Initialize jobs asynchronously in the capacity scheduler. + (Sreekanth Ramakrishnan via yhemanth) + + HADOOP-4649. Improve abstraction for spill indices. (cdouglas) + + HADOOP-3770. Add gridmix2, an iteration on the gridmix benchmark. (Runping + Qi via cdouglas) + + HADOOP-4708. Add support for dfsadmin commands in TestCLI. (Boris Shkolnik + via cdouglas) + + HADOOP-4758. Add a splitter for metrics contexts to support more than one + type of collector. (cdouglas) + + HADOOP-4722. Add tests for dfsadmin quota error messages. (Boris Shkolnik + via cdouglas) + + HADOOP-4690. fuse-dfs - create source file/function + utils + config + + main source files. (pete wyckoff via mahadev) + + HADOOP-3750. Fix and enforce module dependencies. (Sharad Agarwal via + tomwhite) + + HADOOP-4747. Speed up FsShell::ls by removing redundant calls to the + filesystem. (David Phillips via cdouglas) + + HADOOP-4305. Improves the blacklisting strategy, whereby, tasktrackers + that are blacklisted are not given tasks to run from other jobs, subject + to the following conditions (all must be met): + 1) The TaskTracker has been blacklisted by at least 4 jobs (configurable) + 2) The TaskTracker has been blacklisted 50% more number of times than + the average (configurable) + 3) The cluster has less than 50% trackers blacklisted + Once in 24 hours, a TaskTracker blacklisted for all jobs is given a chance. + Restarting the TaskTracker moves it out of the blacklist. + (Amareshwari Sriramadasu via ddas) + + HADOOP-4688. Modify the MiniMRDFSSort unit test to spill multiple times, + exercising the map-side merge code. (cdouglas) + + HADOOP-4737. Adds the KILLED notification when jobs get killed. + (Amareshwari Sriramadasu via ddas) + + HADOOP-4728. Add a test exercising different namenode configurations. + (Boris Shkolnik via cdouglas) + + HADOOP-4807. Adds JobClient commands to get the active/blacklisted tracker + names. Also adds commands to display running/completed task attempt IDs. + (ddas) + + HADOOP-4699. Remove checksum validation from map output servlet. (cdouglas) + + HADOOP-4838. Added a registry to automate metrics and mbeans management. + (Sanjay Radia via acmurthy) + + HADOOP-3136. Fixed the default scheduler to assign multiple tasks to each + tasktracker per heartbeat, when feasible. To ensure locality isn't hurt + too badly, the scheudler will not assign more than one off-switch task per + heartbeat. The heartbeat interval is also halved since the task-tracker is + fixed to no longer send out heartbeats on each task completion. A + slow-start for scheduling reduces is introduced to ensure that reduces + aren't started till sufficient number of maps are done, else reduces of + jobs whose maps aren't scheduled might swamp the cluster. + Configuration changes to mapred-default.xml: + add mapred.reduce.slowstart.completed.maps + (acmurthy) + + HADOOP-4545. Add example and test case of secondary sort for the reduce. + (omalley) + + HADOOP-4753. Refactor gridmix2 to reduce code duplication. (cdouglas) + + HADOOP-4909. Fix Javadoc and make some of the API more consistent in their + use of the JobContext instead of Configuration. (omalley) + + HADOOP-4920. Stop storing Forrest output in Subversion. (cutting) + + HADOOP-4948. Add parameters java5.home and forrest.home to the ant commands + in test-patch.sh. (Giridharan Kesavan via szetszwo) + + HADOOP-4830. Add end-to-end test cases for testing queue capacities. + (Vinod Kumar Vavilapalli via yhemanth) + + HADOOP-4980. Improve code layout of capacity scheduler to make it + easier to fix some blocker bugs. (Vivek Ratan via yhemanth) + + HADOOP-4916. Make user/location of Chukwa installation configurable by an + external properties file. (Eric Yang via cdouglas) + + HADOOP-4950. Make the CompressorStream, DecompressorStream, + BlockCompressorStream, and BlockDecompressorStream public to facilitate + non-Hadoop codecs. (omalley) + + HADOOP-4843. Collect job history and configuration in Chukwa. (Eric Yang + via cdouglas) + + HADOOP-5030. Build Chukwa RPM to install into configured directory. (Eric + Yang via cdouglas) + + HADOOP-4828. Updates documents to do with configuration (HADOOP-4631). + (Sharad Agarwal via ddas) + + HADOOP-4939. Adds a test that would inject random failures for tasks in + large jobs and would also inject TaskTracker failures. (ddas) + + HADOOP-4944. A configuration file can include other configuration + files. (Rama Ramasamy via dhruba) + + HADOOP-4804. Provide Forrest documentation for the Fair Scheduler. + (Sreekanth Ramakrishnan via yhemanth) + + HADOOP-5248. A testcase that checks for the existence of job directory + after the job completes. Fails if it exists. (ddas) + + HADOOP-4664. Introduces multiple job initialization threads, where the + number of threads are configurable via mapred.jobinit.threads. + (Matei Zaharia and Jothi Padmanabhan via ddas) + + HADOOP-4191. Adds a testcase for JobHistory. (Ravi Gummadi via ddas) + + HADOOP-5466. Change documenation CSS style for headers and code. (Corinne + Chandel via szetszwo) + + HADOOP-5275. Add ivy directory and files to built tar. + (Giridharan Kesavan via nigel) + + HADOOP-5468. Add sub-menus to forrest documentation and make some minor + edits. (Corinne Chandel via szetszwo) + + HADOOP-5437. Fix TestMiniMRDFSSort to properly test jvm-reuse. (omalley) + + HADOOP-5521. Removes dependency of TestJobInProgress on RESTART_COUNT + JobHistory tag. (Ravi Gummadi via ddas) + + OPTIMIZATIONS + + HADOOP-3293. Fixes FileInputFormat to do provide locations for splits + based on the rack/host that has the most number of bytes. + (Jothi Padmanabhan via ddas) + + HADOOP-4683. Fixes Reduce shuffle scheduler to invoke + getMapCompletionEvents in a separate thread. (Jothi Padmanabhan + via ddas) + + BUG FIXES + + HADOOP-4204. Fix findbugs warnings related to unused variables, naive + Number subclass instantiation, Map iteration, and badly scoped inner + classes. (Suresh Srinivas via cdouglas) + + HADOOP-4207. Update derby jar file to release 10.4.2 release. + (Prasad Chakka via dhruba) + + HADOOP-4325. SocketInputStream.read() should return -1 in case EOF. + (Raghu Angadi) + + HADOOP-4408. FsAction functions need not create new objects. (cdouglas) + + HADOOP-4440. TestJobInProgressListener tests for jobs killed in queued + state (Amar Kamat via ddas) + + HADOOP-4346. Implement blocking connect so that Hadoop is not affected + by selector problem with JDK default implementation. (Raghu Angadi) + + HADOOP-4388. If there are invalid blocks in the transfer list, Datanode + should handle them and keep transferring the remaining blocks. (Suresh + Srinivas via szetszwo) + + HADOOP-4587. Fix a typo in Mapper javadoc. (Koji Noguchi via szetszwo) + + HADOOP-4530. In fsck, HttpServletResponse sendError fails with + IllegalStateException. (hairong) + + HADOOP-4377. Fix a race condition in directory creation in + NativeS3FileSystem. (David Phillips via cdouglas) + + HADOOP-4621. Fix javadoc warnings caused by duplicate jars. (Kan Zhang via + cdouglas) + + HADOOP-4566. Deploy new hive code to support more types. + (Zheng Shao via dhruba) + + HADOOP-4571. Add chukwa conf files to svn:ignore list. (Eric Yang via + szetszwo) + + HADOOP-4589. Correct PiEstimator output messages and improve the code + readability. (szetszwo) + + HADOOP-4650. Correct a mismatch between the default value of + local.cache.size in the config and the source. (Jeff Hammerbacher via + cdouglas) + + HADOOP-4606. Fix cygpath error if the log directory does not exist. + (szetszwo via omalley) + + HADOOP-4141. Fix bug in ScriptBasedMapping causing potential infinite + loop on misconfigured hadoop-site. (Aaron Kimball via tomwhite) + + HADOOP-4691. Correct a link in the javadoc of IndexedSortable. (szetszwo) + + HADOOP-4598. '-setrep' command skips under-replicated blocks. (hairong) + + HADOOP-4429. Set defaults for user, group in UnixUserGroupInformation so + login fails more predictably when misconfigured. (Alex Loddengaard via + cdouglas) + + HADOOP-4676. Fix broken URL in blacklisted tasktrackers page. (Amareshwari + Sriramadasu via cdouglas) + + HADOOP-3422 Ganglia counter metrics are all reported with the metric + name "value", so the counter values can not be seen. (Jason Attributor + and Brian Bockelman via stack) + + HADOOP-4704. Fix javadoc typos "the the". (szetszwo) + + HADOOP-4677. Fix semantics of FileSystem::getBlockLocations to return + meaningful values. (Hong Tang via cdouglas) + + HADOOP-4669. Use correct operator when evaluating whether access time is + enabled (Dhruba Borthakur via cdouglas) + + HADOOP-4732. Pass connection and read timeouts in the correct order when + setting up fetch in reduce. (Amareshwari Sriramadasu via cdouglas) + + HADOOP-4558. Fix capacity reclamation in capacity scheduler. + (Amar Kamat via yhemanth) + + HADOOP-4770. Fix rungridmix_2 script to work with RunJar. (cdouglas) + + HADOOP-4738. When using git, the saveVersion script will use only the + commit hash for the version and not the message, which requires escaping. + (cdouglas) + + HADOOP-4576. Show pending job count instead of task count in the UI per + queue in capacity scheduler. (Sreekanth Ramakrishnan via yhemanth) + + HADOOP-4623. Maintain running tasks even if speculative execution is off. + (Amar Kamat via yhemanth) + + HADOOP-4786. Fix broken compilation error in + TestTrackerBlacklistAcrossJobs. (yhemanth) + + HADOOP-4785. Fixes theJobTracker heartbeat to not make two calls to + System.currentTimeMillis(). (Amareshwari Sriramadasu via ddas) + + HADOOP-4792. Add generated Chukwa configuration files to version control + ignore lists. (cdouglas) + + HADOOP-4796. Fix Chukwa test configuration, remove unused components. (Eric + Yang via cdouglas) + + HADOOP-4708. Add binaries missed in the initial checkin for Chukwa. (Eric + Yang via cdouglas) + + HADOOP-4805. Remove black list collector from Chukwa Agent HTTP Sender. + (Eric Yang via cdouglas) + + HADOOP-4837. Move HADOOP_CONF_DIR configuration to chukwa-env.sh (Jerome + Boulon via cdouglas) + + HADOOP-4825. Use ps instead of jps for querying process status in Chukwa. + (Eric Yang via cdouglas) + + HADOOP-4844. Fixed javadoc for + org.apache.hadoop.fs.permission.AccessControlException to document that + it's deprecated in favour of + org.apache.hadoop.security.AccessControlException. (acmurthy) + + HADOOP-4706. Close the underlying output stream in + IFileOutputStream::close. (Jothi Padmanabhan via cdouglas) + + HADOOP-4855. Fixed command-specific help messages for refreshServiceAcl in + DFSAdmin and MRAdmin. (acmurthy) + + HADOOP-4820. Remove unused method FSNamesystem::deleteInSafeMode. (Suresh + Srinivas via cdouglas) + + HADOOP-4698. Lower io.sort.mb to 10 in the tests and raise the junit memory + limit to 512m from 256m. (Nigel Daley via cdouglas) + + HADOOP-4860. Split TestFileTailingAdapters into three separate tests to + avoid contention. (Eric Yang via cdouglas) + + HADOOP-3921. Fixed clover (code coverage) target to work with JDK 6. + (tomwhite via nigel) + + HADOOP-4845. Modify the reduce input byte counter to record only the + compressed size and add a human-readable label. (Yongqiang He via cdouglas) + + HADOOP-4458. Add a test creating symlinks in the working directory. + (Amareshwari Sriramadasu via cdouglas) + + HADOOP-4879. Fix org.apache.hadoop.mapred.Counters to correctly define + Object.equals rather than depend on contentEquals api. (omalley via + acmurthy) + + HADOOP-4791. Fix rpm build process for Chukwa. (Eric Yang via cdouglas) + + HADOOP-4771. Correct initialization of the file count for directories + with quotas. (Ruyue Ma via shv) + + HADOOP-4878. Fix eclipse plugin classpath file to point to ivy's resolved + lib directory and added the same to test-patch.sh. (Giridharan Kesavan via + acmurthy) + + HADOOP-4774. Fix default values of some capacity scheduler configuration + items which would otherwise not work on a fresh checkout. + (Sreekanth Ramakrishnan via yhemanth) + + HADOOP-4876. Fix capacity scheduler reclamation by updating count of + pending tasks correctly. (Sreekanth Ramakrishnan via yhemanth) + + HADOOP-4849. Documentation for Service Level Authorization implemented in + HADOOP-4348. (acmurthy) + + HADOOP-4827. Replace Consolidator with Aggregator macros in Chukwa (Eric + Yang via cdouglas) + + HADOOP-4894. Correctly parse ps output in Chukwa jettyCollector.sh. (Ari + Rabkin via cdouglas) + + HADOOP-4892. Close fds out of Chukwa ExecPlugin. (Ari Rabkin via cdouglas) + + HADOOP-4889. Fix permissions in RPM packaging. (Eric Yang via cdouglas) + + HADOOP-4869. Fixes the TT-JT heartbeat to have an explicit flag for + restart apart from the initialContact flag that there was earlier. + (Amareshwari Sriramadasu via ddas) + + HADOOP-4716. Fixes ReduceTask.java to clear out the mapping between + hosts and MapOutputLocation upon a JT restart (Amar Kamat via ddas) + + HADOOP-4880. Removes an unnecessary testcase from TestJobTrackerRestart. + (Amar Kamat via ddas) + + HADOOP-4924. Fixes a race condition in TaskTracker re-init. (ddas) + + HADOOP-4854. Read reclaim capacity interval from capacity scheduler + configuration. (Sreekanth Ramakrishnan via yhemanth) + + HADOOP-4896. HDFS Fsck does not load HDFS configuration. (Raghu Angadi) + + HADOOP-4956. Creates TaskStatus for failed tasks with an empty Counters + object instead of null. (ddas) + + HADOOP-4979. Fix capacity scheduler to block cluster for failed high + RAM requirements across task types. (Vivek Ratan via yhemanth) + + HADOOP-4949. Fix native compilation. (Chris Douglas via acmurthy) + + HADOOP-4787. Fixes the testcase TestTrackerBlacklistAcrossJobs which was + earlier failing randomly. (Amareshwari Sriramadasu via ddas) + + HADOOP-4914. Add description fields to Chukwa init.d scripts (Eric Yang via + cdouglas) + + HADOOP-4884. Make tool tip date format match standard HICC format. (Eric + Yang via cdouglas) + + HADOOP-4925. Make Chukwa sender properties configurable. (Ari Rabkin via + cdouglas) + + HADOOP-4947. Make Chukwa command parsing more forgiving of whitespace. (Ari + Rabkin via cdouglas) + + HADOOP-5026. Make chukwa/bin scripts executable in repository. (Andy + Konwinski via cdouglas) + + HADOOP-4977. Fix a deadlock between the reclaimCapacity and assignTasks + in capacity scheduler. (Vivek Ratan via yhemanth) + + HADOOP-4988. Fix reclaim capacity to work even when there are queues with + no capacity. (Vivek Ratan via yhemanth) + + HADOOP-5065. Remove generic parameters from argument to + setIn/OutputFormatClass so that it works with SequenceIn/OutputFormat. + (cdouglas via omalley) + + HADOOP-4818. Pass user config to instrumentation API. (Eric Yang via + cdouglas) + + HADOOP-4993. Fix Chukwa agent configuration and startup to make it both + more modular and testable. (Ari Rabkin via cdouglas) + + HADOOP-5048. Fix capacity scheduler to correctly cleanup jobs that are + killed after initialization, but before running. + (Sreekanth Ramakrishnan via yhemanth) + + HADOOP-4671. Mark loop control variables shared between threads as + volatile. (cdouglas) + + HADOOP-5079. HashFunction inadvertently destroys some randomness + (Jonathan Ellis via stack) + + HADOOP-4999. A failure to write to FsEditsLog results in + IndexOutOfBounds exception. (Boris Shkolnik via rangadi) + + HADOOP-5139. Catch IllegalArgumentException during metrics registration + in RPC. (Hairong Kuang via szetszwo) + + HADOOP-5085. Copying a file to local with Crc throws an exception. + (hairong) + + HADOOP-5211. Fix check for job completion in TestSetupAndCleanupFailure. + (enis) + + HADOOP-5254. The Configuration class should be able to work with XML + parsers that do not support xmlinclude. (Steve Loughran via dhruba) + + HADOOP-4692. Namenode in infinite loop for replicating/deleting corrupt + blocks. (hairong) + + HADOOP-5255. Fix use of Math.abs to avoid overflow. (Jonathan Ellis via + cdouglas) + + HADOOP-5269. Fixes a problem to do with tasktracker holding on to + FAILED_UNCLEAN or KILLED_UNCLEAN tasks forever. (Amareshwari Sriramadasu + via ddas) + + HADOOP-5214. Fixes a ConcurrentModificationException while the Fairshare + Scheduler accesses the tasktrackers stored by the JobTracker. + (Rahul Kumar Singh via yhemanth) + + HADOOP-5233. Addresses the three issues - Race condition in updating + status, NPE in TaskTracker task localization when the conf file is missing + (HADOOP-5234) and NPE in handling KillTaskAction of a cleanup task + (HADOOP-5235). (Amareshwari Sriramadasu via ddas) + + HADOOP-5247. Introduces a broadcast of KillJobAction to all trackers when + a job finishes. This fixes a bunch of problems to do with NPE when a + completed job is not in memory and a tasktracker comes to the jobtracker + with a status report of a task belonging to that job. (Amar Kamat via ddas) + + HADOOP-5282. Fixed job history logs for task attempts that are + failed by the JobTracker, say due to lost task trackers. (Amar + Kamat via yhemanth) + + HADOOP-5241. Fixes a bug in disk-space resource estimation. Makes + the estimation formula linear where blowUp = + Total-Output/Total-Input. (Sharad Agarwal via ddas) + + HADOOP-5142. Fix MapWritable#putAll to store key/value classes. + (Do??acan G??ney via enis) + + HADOOP-4744. Workaround for jetty6 returning -1 when getLocalPort + is invoked on the connector. The workaround patch retries a few + times before failing. (Jothi Padmanabhan via yhemanth) + + HADOOP-5280. Adds a check to prevent a task state transition from + FAILED to any of UNASSIGNED, RUNNING, COMMIT_PENDING or + SUCCEEDED. (ddas) + + HADOOP-5272. Fixes a problem to do with detecting whether an + attempt is the first attempt of a Task. This affects JobTracker + restart. (Amar Kamat via ddas) + + HADOOP-5306. Fixes a problem to do with logging/parsing the http port of a + lost tracker. Affects JobTracker restart. (Amar Kamat via ddas) + + HADOOP-5111. Fix Job::set* methods to work with generics. (cdouglas) + + HADOOP-5274. Fix gridmix2 dependency on wordcount example. (cdouglas) + + HADOOP-5145. Balancer sometimes runs out of memory after running + days or weeks. (hairong) + + HADOOP-5338. Fix jobtracker restart to clear task completion + events cached by tasktrackers forcing them to fetch all events + afresh, thus avoiding missed task completion events on the + tasktrackers. (Amar Kamat via yhemanth) + + HADOOP-4695. Change TestGlobalFilter so that it allows a web page to be + filtered more than once for a single access. (Kan Zhang via szetszwo) + + HADOOP-5298. Change TestServletFilter so that it allows a web page to be + filtered more than once for a single access. (szetszwo) + + HADOOP-5432. Disable ssl during unit tests in hdfsproxy, as it is unused + and causes failures. (cdouglas) + + HADOOP-5416. Correct the shell command "fs -test" forrest doc description. + (Ravi Phulari via szetszwo) + + HADOOP-5327. Fixed job tracker to remove files from system directory on + ACL check failures and also check ACLs on restart. + (Amar Kamat via yhemanth) + + HADOOP-5395. Change the exception message when a job is submitted to an + invalid queue. (Rahul Kumar Singh via yhemanth) + + HADOOP-5276. Fixes a problem to do with updating the start time of + a task when the tracker that ran the task is lost. (Amar Kamat via + ddas) + + HADOOP-5278. Fixes a problem to do with logging the finish time of + a task during recovery (after a JobTracker restart). (Amar Kamat + via ddas) + + HADOOP-5490. Fixes a synchronization problem in the + EagerTaskInitializationListener class. (Jothi Padmanabhan via + ddas) + + HADOOP-5493. The shuffle copier threads return the codecs back to + the pool when the shuffle completes. (Jothi Padmanabhan via ddas) + + HADOOP-5414. Fixes IO exception while executing hadoop fs -touchz + fileName by making sure that lease renewal thread exits before dfs + client exits. (hairong) + + HADOOP-5103. FileInputFormat now reuses the clusterMap network + topology object and that brings down the log messages in the + JobClient to do with NetworkTopology.add significantly. (Jothi + Padmanabhan via ddas) + + HADOOP-5483. Fixes a problem in the Directory Cleanup Thread due to which + TestMiniMRWithDFS sometimes used to fail. (ddas) + + HADOOP-5281. Prevent sharing incompatible ZlibCompressor instances between + GzipCodec and DefaultCodec. (cdouglas) + + HADOOP-5463. Balancer throws "Not a host:port pair" unless port is + specified in fs.default.name. (Stuart White via hairong) + + HADOOP-5514. Fix JobTracker metrics and add metrics for wating, failed + tasks. (cdouglas) + + HADOOP-5516. Fix NullPointerException in TaskMemoryManagerThread + that comes when monitored processes disappear when the thread is + running. (Vinod Kumar Vavilapalli via yhemanth) + + HADOOP-5382. Support combiners in the new context object API. (omalley) + + HADOOP-5471. Fixes a problem to do with updating the log.index file in the + case where a cleanup task is run. (Amareshwari Sriramadasu via ddas) + + HADOOP-5534. Fixed a deadlock in Fair scheduler's servlet. + (Rahul Kumar Singh via yhemanth) + + HADOOP-5328. Fixes a problem in the renaming of job history files during + job recovery. (Amar Kamat via ddas) + + HADOOP-5417. Don't ignore InterruptedExceptions that happen when calling + into rpc. (omalley) + + HADOOP-5320. Add a close() in TestMapReduceLocal. (Jothi Padmanabhan + via szetszwo) + + HADOOP-5520. Fix a typo in disk quota help message. (Ravi Phulari + via szetszwo) + + HADOOP-5519. Remove claims from mapred-default.xml that prime numbers + of tasks are helpful. (Owen O'Malley via szetszwo) + + HADOOP-5484. TestRecoveryManager fails wtih FileAlreadyExistsException. + (Amar Kamat via hairong) + + HADOOP-5564. Limit the JVM heap size in the java command for initializing + JAVA_PLATFORM. (Suresh Srinivas via szetszwo) + + HADOOP-5565. Add API for failing/finalized jobs to the JT metrics + instrumentation. (Jerome Boulon via cdouglas) + + HADOOP-5390. Remove duplicate jars from tarball, src from binary tarball + added by hdfsproxy. (Zhiyong Zhang via cdouglas) + + HADOOP-5066. Building binary tarball should not build docs/javadocs, copy + src, or run jdiff. (Giridharan Kesavan via cdouglas) + + HADOOP-5459. Fix undetected CRC errors where intermediate output is closed + before it has been completely consumed. (cdouglas) + + HADOOP-5571. Remove widening primitive conversion in TupleWritable mask + manipulation. (Jingkei Ly via cdouglas) + + HADOOP-5588. Remove an unnecessary call to listStatus(..) in + FileSystem.globStatusInternal(..). (Hairong Kuang via szetszwo) + + HADOOP-5473. Solves a race condition in killing a task - the state is KILLED + if there is a user request pending to kill the task and the TT reported + the state as SUCCESS. (Amareshwari Sriramadasu via ddas) + + HADOOP-5576. Fix LocalRunner to work with the new context object API in + mapreduce. (Tom White via omalley) + + HADOOP-4374. Installs a shutdown hook in the Task JVM so that log.index is + updated before the JVM exits. Also makes the update to log.index atomic. + (Ravi Gummadi via ddas) + + HADOOP-5577. Add a verbose flag to mapreduce.Job.waitForCompletion to get + the running job's information printed to the user's stdout as it runs. + (omalley) + + HADOOP-5607. Fix NPE in TestCapacityScheduler. (cdouglas) + + HADOOP-5605. All the replicas incorrectly got marked as corrupt. (hairong) + + HADOOP-5337. JobTracker, upon restart, now waits for the TaskTrackers to + join back before scheduling new tasks. This fixes race conditions associated + with greedy scheduling as was the case earlier. (Amar Kamat via ddas) + + HADOOP-5227. Fix distcp so -update and -delete can be meaningfully + combined. (Tsz Wo (Nicholas), SZE via cdouglas) + + HADOOP-5305. Increase number of files and print debug messages in + TestCopyFiles. (szetszwo) + + HADOOP-5548. Add synchronization for JobTracker methods in RecoveryManager. + (Amareshwari Sriramadasu via sharad) + + HADOOP-3810. NameNode seems unstable on a cluster with little space left. + (hairong) + + HADOOP-5068. Fix NPE in TestCapacityScheduler. (Vinod Kumar Vavilapalli + via szetszwo) + + HADOOP-5585. Clear FileSystem statistics between tasks when jvm-reuse + is enabled. (omalley) + + HADOOP-5394. JobTracker might schedule 2 attempts of the same task + with the same attempt id across restarts. (Amar Kamat via sharad) + + HADOOP-5645. After HADOOP-4920 we need a place to checkin + releasenotes.html. (nigel) + +Release 0.19.2 - 2009-06-30 + + BUG FIXES + + HADOOP-5154. Fixes a deadlock in the fairshare scheduler. + (Matei Zaharia via yhemanth) + + HADOOP-5146. Fixes a race condition that causes LocalDirAllocator to miss + files. (Devaraj Das via yhemanth) + + HADOOP-4638. Fixes job recovery to not crash the job tracker for problems + with a single job file. (Amar Kamat via yhemanth) + + HADOOP-5384. Fix a problem that DataNodeCluster creates blocks with + generationStamp == 1. (szetszwo) + + HADOOP-5376. Fixes the code handling lost tasktrackers to set the task state + to KILLED_UNCLEAN only for relevant type of tasks. + (Amareshwari Sriramadasu via yhemanth) + + HADOOP-5285. Fixes the issues - (1) obtainTaskCleanupTask checks whether job is + inited before trying to lock the JobInProgress (2) Moves the CleanupQueue class + outside the TaskTracker and makes it a generic class that is used by the + JobTracker also for deleting the paths on the job's output fs. (3) Moves the + references to completedJobStore outside the block where the JobTracker is locked. + (ddas) + + HADOOP-5392. Fixes a problem to do with JT crashing during recovery when + the job files are garbled. (Amar Kamat via ddas) + + HADOOP-5332. Appending to files is not allowed (by default) unless + dfs.support.append is set to true. (dhruba) + + HADOOP-5333. libhdfs supports appending to files. (dhruba) + + HADOOP-3998. Fix dfsclient exception when JVM is shutdown. (dhruba) + + HADOOP-5440. Fixes a problem to do with removing a taskId from the list + of taskIds that the TaskTracker's TaskMemoryManager manages. + (Amareshwari Sriramadasu via ddas) + + HADOOP-5446. Restore TaskTracker metrics. (cdouglas) + + HADOOP-5449. Fixes the history cleaner thread. + (Amareshwari Sriramadasu via ddas) + + HADOOP-5479. NameNode should not send empty block replication request to + DataNode. (hairong) + + HADOOP-5259. Job with output hdfs:/user//outputpath (no + authority) fails with Wrong FS. (Doug Cutting via hairong) + + HADOOP-5522. Documents the setup/cleanup tasks in the mapred tutorial. + (Amareshwari Sriramadasu via ddas) + + HADOOP-5549. ReplicationMonitor should schedule both replication and + deletion work in one iteration. (hairong) + + HADOOP-5554. DataNodeCluster and CreateEditsLog should create blocks with + the same generation stamp value. (hairong via szetszwo) + + HADOOP-5231. Clones the TaskStatus before passing it to the JobInProgress. + (Amareshwari Sriramadasu via ddas) + + HADOOP-4719. Fix documentation of 'ls' format for FsShell. (Ravi Phulari + via cdouglas) + + HADOOP-5374. Fixes a NPE problem in getTasksToSave method. + (Amareshwari Sriramadasu via ddas) + + HADOOP-4780. Cache the size of directories in DistributedCache, avoiding + long delays in recalculating it. (He Yongqiang via cdouglas) + + HADOOP-5551. Prevent directory destruction on file create. + (Brian Bockelman via shv) + + HADOOP-5671. Fix FNF exceptions when copying from old versions of + HftpFileSystem. (Tsz Wo (Nicholas), SZE via cdouglas) + + HADOOP-5213. Fix Null pointer exception caused when bzip2compression + was used and user closed a output stream without writing any data. + (Zheng Shao via dhruba) + + HADOOP-5579. Set errno correctly in libhdfs for permission, quota, and FNF + conditions. (Brian Bockelman via cdouglas) + + HADOOP-5816. Fixes a problem in the KeyFieldBasedComparator to do with + ArrayIndexOutOfBounds exception. (He Yongqiang via ddas) + + HADOOP-5951. Add Apache license header to StorageInfo.java. (Suresh + Srinivas via szetszwo) + +Release 0.19.1 - 2009-02-23 + + IMPROVEMENTS + + HADOOP-4739. Fix spelling and grammar, improve phrasing of some sections in + mapred tutorial. (Vivek Ratan via cdouglas) + + HADOOP-3894. DFSClient logging improvements. (Steve Loughran via shv) + + HADOOP-5126. Remove empty file BlocksWithLocations.java (shv) + + HADOOP-5127. Remove public methods in FSDirectory. (Jakob Homan via shv) + + BUG FIXES + + HADOOP-4697. Fix getBlockLocations in KosmosFileSystem to handle multiple + blocks correctly. (Sriram Rao via cdouglas) + + HADOOP-4420. Add null checks for job, caused by invalid job IDs. + (Aaron Kimball via tomwhite) + + HADOOP-4632. Fix TestJobHistoryVersion to use test.build.dir instead of the + current workding directory for scratch space. (Amar Kamat via cdouglas) + + HADOOP-4508. Fix FSDataOutputStream.getPos() for append. (dhruba via + szetszwo) + + HADOOP-4727. Fix a group checking bug in fill_stat_structure(...) in + fuse-dfs. (Brian Bockelman via szetszwo) + + HADOOP-4836. Correct typos in mapred related documentation. (Jord? Polo + via szetszwo) + + HADOOP-4821. Usage description in the Quotas guide documentations are + incorrect. (Boris Shkolnik via hairong) + + HADOOP-4847. Moves the loading of OutputCommitter to the Task. + (Amareshwari Sriramadasu via ddas) + + HADOOP-4966. Marks completed setup tasks for removal. + (Amareshwari Sriramadasu via ddas) + + HADOOP-4982. TestFsck should run in Eclipse. (shv) + + HADOOP-5008. TestReplication#testPendingReplicationRetry leaves an opened + fd unclosed. (hairong) + + HADOOP-4906. Fix TaskTracker OOM by keeping a shallow copy of JobConf in + TaskTracker.TaskInProgress. (Sharad Agarwal via acmurthy) + + HADOOP-4918. Fix bzip2 compression to work with Sequence Files. + (Zheng Shao via dhruba). + + HADOOP-4965. TestFileAppend3 should close FileSystem. (shv) + + HADOOP-4967. Fixes a race condition in the JvmManager to do with killing + tasks. (ddas) + + HADOOP-5009. DataNode#shutdown sometimes leaves data block scanner + verification log unclosed. (hairong) + + HADOOP-5086. Use the appropriate FileSystem for trash URIs. (cdouglas) + + HADOOP-4955. Make DBOutputFormat us column names from setOutput(). + (Kevin Peterson via enis) + + HADOOP-4862. Minor : HADOOP-3678 did not remove all the cases of + spurious IOExceptions logged by DataNode. (Raghu Angadi) + + HADOOP-5034. NameNode should send both replication and deletion requests + to DataNode in one reply to a heartbeat. (hairong) + + HADOOP-4759. Removes temporary output directory for failed and killed + tasks by launching special CLEANUP tasks for the same. + (Amareshwari Sriramadasu via ddas) + + HADOOP-5161. Accepted sockets do not get placed in + DataXceiverServer#childSockets. (hairong) + + HADOOP-5193. Correct calculation of edits modification time. (shv) + + HADOOP-4494. Allow libhdfs to append to files. + (Pete Wyckoff via dhruba) + + HADOOP-5166. Fix JobTracker restart to work when ACLs are configured + for the JobTracker. (Amar Kamat via yhemanth). + + HADOOP-5067. Fixes TaskInProgress.java to keep track of count of failed and + killed tasks correctly. (Amareshwari Sriramadasu via ddas) + + HADOOP-4760. HDFS streams should not throw exceptions when closed twice. + (enis) + +Release 0.19.0 - 2008-11-18 + + INCOMPATIBLE CHANGES + + HADOOP-3595. Remove deprecated methods for mapred.combine.once + functionality, which was necessary to providing backwards + compatible combiner semantics for 0.18. (cdouglas via omalley) + + HADOOP-3667. Remove the following deprecated methods from JobConf: + addInputPath(Path) + getInputPaths() + getMapOutputCompressionType() + getOutputPath() + getSystemDir() + setInputPath(Path) + setMapOutputCompressionType(CompressionType style) + setOutputPath(Path) + (Amareshwari Sriramadasu via omalley) + + HADOOP-3652. Remove deprecated class OutputFormatBase. + (Amareshwari Sriramadasu via cdouglas) + + HADOOP-2885. Break the hadoop.dfs package into separate packages under + hadoop.hdfs that reflect whether they are client, server, protocol, + etc. DistributedFileSystem and DFSClient have moved and are now + considered package private. (Sanjay Radia via omalley) + + HADOOP-2325. Require Java 6. (cutting) + + HADOOP-372. Add support for multiple input paths with a different + InputFormat and Mapper for each path. (Chris Smith via tomwhite) + + HADOOP-1700. Support appending to file in HDFS. (dhruba) + + HADOOP-3792. Make FsShell -test consistent with unix semantics, returning + zero for true and non-zero for false. (Ben Slusky via cdouglas) + + HADOOP-3664. Remove the deprecated method InputFormat.validateInput, + which is no longer needed. (tomwhite via omalley) + + HADOOP-3549. Give more meaningful errno's in libhdfs. In particular, + EACCES is returned for permission problems. (Ben Slusky via omalley) + + HADOOP-4036. ResourceStatus was added to TaskTrackerStatus by HADOOP-3759, + so increment the InterTrackerProtocol version. (Hemanth Yamijala via + omalley) + + HADOOP-3150. Moves task promotion to tasks. Defines a new interface for + committing output files. Moves job setup to jobclient, and moves jobcleanup + to a separate task. (Amareshwari Sriramadasu via ddas) + + HADOOP-3446. Keep map outputs in memory during the reduce. Remove + fs.inmemory.size.mb and replace with properties defining in memory map + output retention during the shuffle and reduce relative to maximum heap + usage. (cdouglas) + + HADOOP-3245. Adds the feature for supporting JobTracker restart. Running + jobs can be recovered from the history file. The history file format has + been modified to support recovery. The task attempt ID now has the + JobTracker start time to disinguish attempts of the same TIP across + restarts. (Amar Ramesh Kamat via ddas) + + HADOOP-4007. REMOVE DFSFileInfo - FileStatus is sufficient. + (Sanjay Radia via hairong) + + HADOOP-3722. Fixed Hadoop Streaming and Hadoop Pipes to use the Tool + interface and GenericOptionsParser. (Enis Soztutar via acmurthy) + + HADOOP-2816. Cluster summary at name node web reports the space + utilization as: + Configured Capacity: capacity of all the data directories - Reserved space + Present Capacity: Space available for dfs,i.e. remaining+used space + DFS Used%: DFS used space/Present Capacity + (Suresh Srinivas via hairong) + + HADOOP-3938. Disk space quotas for HDFS. This is similar to namespace + quotas in 0.18. (rangadi) + + HADOOP-4293. Make Configuration Writable and remove unreleased + WritableJobConf. Configuration.write is renamed to writeXml. (omalley) + + HADOOP-4281. Change dfsadmin to report available disk space in a format + consistent with the web interface as defined in HADOOP-2816. (Suresh + Srinivas via cdouglas) + + HADOOP-4430. Further change the cluster summary at name node web that was + changed in HADOOP-2816: + Non DFS Used - This indicates the disk space taken by non DFS file from + the Configured capacity + DFS Used % - DFS Used % of Configured Capacity + DFS Remaining % - Remaing % Configured Capacity available for DFS use + DFS command line report reflects the same change. Config parameter + dfs.datanode.du.pct is no longer used and is removed from the + hadoop-default.xml. (Suresh Srinivas via hairong) + + HADOOP-4116. Balancer should provide better resource management. (hairong) + + HADOOP-4599. BlocksMap and BlockInfo made package private. (shv) + + NEW FEATURES + + HADOOP-3341. Allow streaming jobs to specify the field separator for map + and reduce input and output. The new configuration values are: + stream.map.input.field.separator + stream.map.output.field.separator + stream.reduce.input.field.separator + stream.reduce.output.field.separator + All of them default to "\t". (Zheng Shao via omalley) + + HADOOP-3479. Defines the configuration file for the resource manager in + Hadoop. You can configure various parameters related to scheduling, such + as queues and queue properties here. The properties for a queue follow a + naming convention,such as, hadoop.rm.queue.queue-name.property-name. + (Hemanth Yamijala via ddas) + + HADOOP-3149. Adds a way in which map/reducetasks can create multiple + outputs. (Alejandro Abdelnur via ddas) + + HADOOP-3714. Add a new contrib, bash-tab-completion, which enables + bash tab completion for the bin/hadoop script. See the README file + in the contrib directory for the installation. (Chris Smith via enis) + + HADOOP-3730. Adds a new JobConf constructor that disables loading + default configurations. (Alejandro Abdelnur via ddas) + + HADOOP-3772. Add a new Hadoop Instrumentation api for the JobTracker and + the TaskTracker, refactor Hadoop Metrics as an implementation of the api. + (Ari Rabkin via acmurthy) + + HADOOP-2302. Provides a comparator for numerical sorting of key fields. + (ddas) + + HADOOP-153. Provides a way to skip bad records. (Sharad Agarwal via ddas) + + HADOOP-657. Free disk space should be modelled and used by the scheduler + to make scheduling decisions. (Ari Rabkin via omalley) + + HADOOP-3719. Initial checkin of Chukwa, which is a data collection and + analysis framework. (Jerome Boulon, Andy Konwinski, Ari Rabkin, + and Eric Yang) + + HADOOP-3873. Add -filelimit and -sizelimit options to distcp to cap the + number of files/bytes copied in a particular run to support incremental + updates and mirroring. (TszWo (Nicholas), SZE via cdouglas) + + HADOOP-3585. FailMon package for hardware failure monitoring and + analysis of anomalies. (Ioannis Koltsidas via dhruba) + + HADOOP-1480. Add counters to the C++ Pipes API. (acmurthy via omalley) + + HADOOP-3854. Add support for pluggable servlet filters in the HttpServers. + (Tsz Wo (Nicholas) Sze via omalley) + + HADOOP-3759. Provides ability to run memory intensive jobs without + affecting other running tasks on the nodes. (Hemanth Yamijala via ddas) + + HADOOP-3746. Add a fair share scheduler. (Matei Zaharia via omalley) + + HADOOP-3754. Add a thrift interface to access HDFS. (dhruba via omalley) + + HADOOP-3828. Provides a way to write skipped records to DFS. + (Sharad Agarwal via ddas) + + HADOOP-3948. Separate name-node edits and fsimage directories. + (Lohit Vijayarenu via shv) + + HADOOP-3939. Add an option to DistCp to delete files at the destination + not present at the source. (Tsz Wo (Nicholas) Sze via cdouglas) + + HADOOP-3601. Add a new contrib module for Hive, which is a sql-like + query processing tool that uses map/reduce. (Ashish Thusoo via omalley) + + HADOOP-3866. Added sort and multi-job updates in the JobTracker web ui. + (Craig Weisenfluh via omalley) + + HADOOP-3698. Add access control to control who is allowed to submit or + modify jobs in the JobTracker. (Hemanth Yamijala via omalley) + + HADOOP-1869. Support access times for HDFS files. (dhruba) + + HADOOP-3941. Extend FileSystem API to return file-checksums. + (szetszwo) + + HADOOP-3581. Prevents memory intensive user tasks from taking down + nodes. (Vinod K V via ddas) + + HADOOP-3970. Provides a way to recover counters written to JobHistory. + (Amar Kamat via ddas) + + HADOOP-3702. Adds ChainMapper and ChainReducer classes allow composing + chains of Maps and Reduces in a single Map/Reduce job, something like + MAP+ / REDUCE MAP*. (Alejandro Abdelnur via ddas) + + HADOOP-3445. Add capacity scheduler that provides guaranteed capacities to + queues as a percentage of the cluster. (Vivek Ratan via omalley) + + HADOOP-3992. Add a synthetic load generation facility to the test + directory. (hairong via szetszwo) + + HADOOP-3981. Implement a distributed file checksum algorithm in HDFS + and change DistCp to use file checksum for comparing src and dst files + (szetszwo) + + HADOOP-3829. Narrown down skipped records based on user acceptable value. + (Sharad Agarwal via ddas) + + HADOOP-3930. Add common interfaces for the pluggable schedulers and the + cli & gui clients. (Sreekanth Ramakrishnan via omalley) + + HADOOP-4176. Implement getFileChecksum(Path) in HftpFileSystem. (szetszwo) + + HADOOP-249. Reuse JVMs across Map-Reduce Tasks. + Configuration changes to hadoop-default.xml: + add mapred.job.reuse.jvm.num.tasks + (Devaraj Das via acmurthy) + + HADOOP-4070. Provide a mechanism in Hive for registering UDFs from the + query language. (tomwhite) + + HADOOP-2536. Implement a JDBC based database input and output formats to + allow Map-Reduce applications to work with databases. (Fredrik Hedberg and + Enis Soztutar via acmurthy) + + HADOOP-3019. A new library to support total order partitions. + (cdouglas via omalley) + + HADOOP-3924. Added a 'KILLED' job status. (Subramaniam Krishnan via + acmurthy) + + IMPROVEMENTS + + HADOOP-4205. hive: metastore and ql to use the refactored SerDe library. + (zshao) + + HADOOP-4106. libhdfs: add time, permission and user attribute support + (part 2). (Pete Wyckoff through zshao) + + HADOOP-4104. libhdfs: add time, permission and user attribute support. + (Pete Wyckoff through zshao) + + HADOOP-3908. libhdfs: better error message if llibhdfs.so doesn't exist. + (Pete Wyckoff through zshao) + + HADOOP-3732. Delay intialization of datanode block verification till + the verification thread is started. (rangadi) + + HADOOP-1627. Various small improvements to 'dfsadmin -report' output. + (rangadi) + + HADOOP-3577. Tools to inject blocks into name node and simulated + data nodes for testing. (Sanjay Radia via hairong) + + HADOOP-2664. Add a lzop compatible codec, so that files compressed by lzop + may be processed by map/reduce. (cdouglas via omalley) + + HADOOP-3655. Add additional ant properties to control junit. (Steve + Loughran via omalley) + + HADOOP-3543. Update the copyright year to 2008. (cdouglas via omalley) + + HADOOP-3587. Add a unit test for the contrib/data_join framework. + (cdouglas) + + HADOOP-3402. Add terasort example program (omalley) + + HADOOP-3660. Add replication factor for injecting blocks in simulated + datanodes. (Sanjay Radia via cdouglas) + + HADOOP-3684. Add a cloning function to the contrib/data_join framework + permitting users to define a more efficient method for cloning values from + the reduce than serialization/deserialization. (Runping Qi via cdouglas) + + HADOOP-3478. Improves the handling of map output fetching. Now the + randomization is by the hosts (and not the map outputs themselves). + (Jothi Padmanabhan via ddas) + + HADOOP-3617. Removed redundant checks of accounting space in MapTask and + makes the spill thread persistent so as to avoid creating a new one for + each spill. (Chris Douglas via acmurthy) + + HADOOP-3412. Factor the scheduler out of the JobTracker and make + it pluggable. (Tom White and Brice Arnould via omalley) + + HADOOP-3756. Minor. Remove unused dfs.client.buffer.dir from + hadoop-default.xml. (rangadi) + + HADOOP-3747. Adds counter suport for MultipleOutputs. + (Alejandro Abdelnur via ddas) + + HADOOP-3169. LeaseChecker daemon should not be started in DFSClient + constructor. (TszWo (Nicholas), SZE via hairong) + + HADOOP-3824. Move base functionality of StatusHttpServer to a core + package. (TszWo (Nicholas), SZE via cdouglas) + + HADOOP-3646. Add a bzip2 compatible codec, so bzip compressed data + may be processed by map/reduce. (Abdul Qadeer via cdouglas) + + HADOOP-3861. MapFile.Reader and Writer should implement Closeable. + (tomwhite via omalley) + + HADOOP-3791. Introduce generics into ReflectionUtils. (Chris Smith via + cdouglas) + + HADOOP-3694. Improve unit test performance by changing + MiniDFSCluster to listen only on 127.0.0.1. (cutting) + + HADOOP-3620. Namenode should synchronously resolve a datanode's network + location when the datanode registers. (hairong) + + HADOOP-3860. NNThroughputBenchmark is extended with rename and delete + benchmarks. (shv) + + HADOOP-3892. Include unix group name in JobConf. (Matei Zaharia via johan) + + HADOOP-3875. Change the time period between heartbeats to be relative to + the end of the heartbeat rpc, rather than the start. This causes better + behavior if the JobTracker is overloaded. (acmurthy via omalley) + + HADOOP-3853. Move multiple input format (HADOOP-372) extension to + library package. (tomwhite via johan) + + HADOOP-9. Use roulette scheduling for temporary space when the size + is not known. (Ari Rabkin via omalley) + + HADOOP-3202. Use recursive delete rather than FileUtil.fullyDelete. + (Amareshwari Sriramadasu via omalley) + + HADOOP-3368. Remove common-logging.properties from conf. (Steve Loughran + via omalley) + + HADOOP-3851. Fix spelling mistake in FSNamesystemMetrics. (Steve Loughran + via omalley) + + HADOOP-3780. Remove asynchronous resolution of network topology in the + JobTracker (Amar Kamat via omalley) + + HADOOP-3852. Add ShellCommandExecutor.toString method to make nicer + error messages. (Steve Loughran via omalley) + + HADOOP-3844. Include message of local exception in RPC client failures. + (Steve Loughran via omalley) + + HADOOP-3935. Split out inner classes from DataNode.java. (johan) + + HADOOP-3905. Create generic interfaces for edit log streams. (shv) + + HADOOP-3062. Add metrics to DataNode and TaskTracker to record network + traffic for HDFS reads/writes and MR shuffling. (cdouglas) + + HADOOP-3742. Remove HDFS from public java doc and add javadoc-dev for + generative javadoc for developers. (Sanjay Radia via omalley) + + HADOOP-3944. Improve documentation for public TupleWritable class in + join package. (Chris Douglas via enis) + + HADOOP-2330. Preallocate HDFS transaction log to improve performance. + (dhruba and hairong) + + HADOOP-3965. Convert DataBlockScanner into a package private class. (shv) + + HADOOP-3488. Prevent hadoop-daemon from rsync'ing log files (Stefan + Groshupf and Craig Macdonald via omalley) + + HADOOP-3342. Change the kill task actions to require http post instead of + get to prevent accidental crawls from triggering it. (enis via omalley) + + HADOOP-3937. Limit the job name in the job history filename to 50 + characters. (Matei Zaharia via omalley) + + HADOOP-3943. Remove unnecessary synchronization in + NetworkTopology.pseudoSortByDistance. (hairong via omalley) + + HADOOP-3498. File globbing alternation should be able to span path + components. (tomwhite) + + HADOOP-3361. Implement renames for NativeS3FileSystem. + (Albert Chern via tomwhite) + + HADOOP-3605. Make EC2 scripts show an error message if AWS_ACCOUNT_ID is + unset. (Al Hoang via tomwhite) + + HADOOP-4147. Remove unused class JobWithTaskContext from class + JobInProgress. (Amareshwari Sriramadasu via johan) + + HADOOP-4151. Add a byte-comparable interface that both Text and + BytesWritable implement. (cdouglas via omalley) + + HADOOP-4174. Move fs image/edit log methods from ClientProtocol to + NamenodeProtocol. (shv via szetszwo) + + HADOOP-4181. Include a .gitignore and saveVersion.sh change to support + developing under git. (omalley) + + HADOOP-4186. Factor LineReader out of LineRecordReader. (tomwhite via + omalley) + + HADOOP-4184. Break the module dependencies between core, hdfs, and + mapred. (tomwhite via omalley) + + HADOOP-4075. test-patch.sh now spits out ant commands that it runs. + (Ramya R via nigel) + + HADOOP-4117. Improve configurability of Hadoop EC2 instances. + (tomwhite) + + HADOOP-2411. Add support for larger CPU EC2 instance types. + (Chris K Wensel via tomwhite) + + HADOOP-4083. Changed the configuration attribute queue.name to + mapred.job.queue.name. (Hemanth Yamijala via acmurthy) + + HADOOP-4194. Added the JobConf and JobID to job-related methods in + JobTrackerInstrumentation for better metrics. (Mac Yang via acmurthy) + + HADOOP-3975. Change test-patch script to report working the dir + modifications preventing the suite from being run. (Ramya R via cdouglas) + + HADOOP-4124. Added a command-line switch to allow users to set job + priorities, also allow it to be manipulated via the web-ui. (Hemanth + Yamijala via acmurthy) + + HADOOP-2165. Augmented JobHistory to include the URIs to the tasks' + userlogs. (Vinod Kumar Vavilapalli via acmurthy) + + HADOOP-4062. Remove the synchronization on the output stream when a + connection is closed and also remove an undesirable exception when + a client is stoped while there is no pending RPC request. (hairong) + + HADOOP-4227. Remove the deprecated class org.apache.hadoop.fs.ShellCommand. + (szetszwo) + + HADOOP-4006. Clean up FSConstants and move some of the constants to + better places. (Sanjay Radia via rangadi) + + HADOOP-4279. Trace the seeds of random sequences in append unit tests to + make itermitant failures reproducible. (szetszwo via cdouglas) + + HADOOP-4209. Remove the change to the format of task attempt id by + incrementing the task attempt numbers by 1000 when the job restarts. + (Amar Kamat via omalley) + + HADOOP-4301. Adds forrest doc for the skip bad records feature. + (Sharad Agarwal via ddas) + + HADOOP-4354. Separate TestDatanodeDeath.testDatanodeDeath() into 4 tests. + (szetszwo) + + HADOOP-3790. Add more unit tests for testing HDFS file append. (szetszwo) + + HADOOP-4321. Include documentation for the capacity scheduler. (Hemanth + Yamijala via omalley) + + HADOOP-4424. Change menu layout for Hadoop documentation (Boris Shkolnik + via cdouglas). + + HADOOP-4438. Update forrest documentation to include missing FsShell + commands. (Suresh Srinivas via cdouglas) + + HADOOP-4105. Add forrest documentation for libhdfs. + (Pete Wyckoff via cutting) + + HADOOP-4510. Make getTaskOutputPath public. (Chris Wensel via omalley) + + OPTIMIZATIONS + + HADOOP-3556. Removed lock contention in MD5Hash by changing the + singleton MessageDigester by an instance per Thread using + ThreadLocal. (Iv?n de Prado via omalley) + + HADOOP-3328. When client is writing data to DFS, only the last + datanode in the pipeline needs to verify the checksum. Saves around + 30% CPU on intermediate datanodes. (rangadi) + + HADOOP-3863. Use a thread-local string encoder rather than a static one + that is protected by a lock. (acmurthy via omalley) + + HADOOP-3864. Prevent the JobTracker from locking up when a job is being + initialized. (acmurthy via omalley) + + HADOOP-3816. Faster directory listing in KFS. (Sriram Rao via omalley) + + HADOOP-2130. Pipes submit job should have both blocking and non-blocking + versions. (acmurthy via omalley) + + HADOOP-3769. Make the SampleMapper and SampleReducer from + GenericMRLoadGenerator public, so they can be used in other contexts. + (Lingyun Yang via omalley) + + HADOOP-3514. Inline the CRCs in intermediate files as opposed to reading + it from a different .crc file. (Jothi Padmanabhan via ddas) + + HADOOP-3638. Caches the iFile index files in memory to reduce seeks + (Jothi Padmanabhan via ddas) + + HADOOP-4225. FSEditLog.logOpenFile() should persist accessTime + rather than modificationTime. (shv) + + HADOOP-4380. Made several new classes (Child, JVMId, + JobTrackerInstrumentation, QueueManager, ResourceEstimator, + TaskTrackerInstrumentation, and TaskTrackerMetricsInst) in + org.apache.hadoop.mapred package private instead of public. (omalley) + + BUG FIXES + + HADOOP-3563. Refactor the distributed upgrade code so that it is + easier to identify datanode and namenode related code. (dhruba) + + HADOOP-3640. Fix the read method in the NativeS3InputStream. (tomwhite via + omalley) + + HADOOP-3711. Fixes the Streaming input parsing to properly find the + separator. (Amareshwari Sriramadasu via ddas) + + HADOOP-3725. Prevent TestMiniMRMapDebugScript from swallowing exceptions. + (Steve Loughran via cdouglas) + + HADOOP-3726. Throw exceptions from TestCLI setup and teardown instead of + swallowing them. (Steve Loughran via cdouglas) + + HADOOP-3721. Refactor CompositeRecordReader and related mapred.join classes + to make them clearer. (cdouglas) + + HADOOP-3720. Re-read the config file when dfsadmin -refreshNodes is invoked + so dfs.hosts and dfs.hosts.exclude are observed. (lohit vijayarenu via + cdouglas) + + HADOOP-3485. Allow writing to files over fuse. + (Pete Wyckoff via dhruba) + + HADOOP-3723. The flags to the libhdfs.create call can be treated as + a bitmask. (Pete Wyckoff via dhruba) + + HADOOP-3643. Filter out completed tasks when asking for running tasks in + the JobTracker web/ui. (Amar Kamat via omalley) + + HADOOP-3777. Ensure that Lzo compressors/decompressors correctly handle the + case where native libraries aren't available. (Chris Douglas via acmurthy) + + HADOOP-3728. Fix SleepJob so that it doesn't depend on temporary files, + this ensures we can now run more than one instance of SleepJob + simultaneously. (Chris Douglas via acmurthy) + + HADOOP-3795. Fix saving image files on Namenode with different checkpoint + stamps. (Lohit Vijayarenu via mahadev) + + HADOOP-3624. Improving createeditslog to create tree directory structure. + (Lohit Vijayarenu via mahadev) + + HADOOP-3778. DFSInputStream.seek() did not retry in case of some errors. + (Luo Ning via rangadi) + + HADOOP-3661. The handling of moving files deleted through fuse-dfs to + Trash made similar to the behaviour from dfs shell. + (Pete Wyckoff via dhruba) + + HADOOP-3819. Unset LANG and LC_CTYPE in saveVersion.sh to make it + compatible with non-English locales. (Rong-En Fan via cdouglas) + + HADOOP-3848. Cache calls to getSystemDir in the TaskTracker instead of + calling it for each task start. (acmurthy via omalley) + + HADOOP-3131. Fix reduce progress reporting for compressed intermediate + data. (Matei Zaharia via acmurthy) + + HADOOP-3796. fuse-dfs configuration is implemented as file system + mount options. (Pete Wyckoff via dhruba) + + HADOOP-3836. Fix TestMultipleOutputs to correctly clean up. (Alejandro + Abdelnur via acmurthy) + + HADOOP-3805. Improve fuse-dfs write performance. + (Pete Wyckoff via zshao) + + HADOOP-3846. Fix unit test CreateEditsLog to generate paths correctly. + (Lohit Vjayarenu via cdouglas) + + HADOOP-3904. Fix unit tests using the old dfs package name. + (TszWo (Nicholas), SZE via johan) + + HADOOP-3319. Fix some HOD error messages to go stderr instead of + stdout. (Vinod Kumar Vavilapalli via omalley) + + HADOOP-3907. Move INodeDirectoryWithQuota to its own .java file. + (Tsz Wo (Nicholas), SZE via hairong) + + HADOOP-3919. Fix attribute name in hadoop-default for + mapred.jobtracker.instrumentation. (Ari Rabkin via omalley) + + HADOOP-3903. Change the package name for the servlets to be hdfs instead of + dfs. (Tsz Wo (Nicholas) Sze via omalley) + + HADOOP-3773. Change Pipes to set the default map output key and value + types correctly. (Koji Noguchi via omalley) + + HADOOP-3952. Fix compilation error in TestDataJoin referencing dfs package. + (omalley) + + HADOOP-3951. Fix package name for FSNamesystem logs and modify other + hard-coded Logs to use the class name. (cdouglas) + + HADOOP-3889. Improve error reporting from HftpFileSystem, handling in + DistCp. (Tsz Wo (Nicholas), SZE via cdouglas) + + HADOOP-3946. Fix TestMapRed after hadoop-3664. (tomwhite via omalley) + + HADOOP-3949. Remove duplicate jars from Chukwa. (Jerome Boulon via omalley) + + HADOOP-3933. DataNode sometimes sends up to io.byte.per.checksum bytes + more than required to client. (Ning Li via rangadi) + + HADOOP-3962. Shell command "fs -count" should support paths with different + file systems. (Tsz Wo (Nicholas), SZE via mahadev) + + HADOOP-3957. Fix javac warnings in DistCp and TestCopyFiles. (Tsz Wo + (Nicholas), SZE via cdouglas) + + HADOOP-3958. Fix TestMapRed to check the success of test-job. (omalley via + acmurthy) + + HADOOP-3985. Fix TestHDFSServerPorts to use random ports. (Hairong Kuang + via omalley) + + HADOOP-3964. Fix javadoc warnings introduced by FailMon. (dhruba) + + HADOOP-3785. Fix FileSystem cache to be case-insensitive for scheme and + authority. (Bill de hOra via cdouglas) + + HADOOP-3506. Fix a rare NPE caused by error handling in S3. (Tom White via + cdouglas) + + HADOOP-3705. Fix mapred.join parser to accept InputFormats named with + underscore and static, inner classes. (cdouglas) + + HADOOP-4023. Fix javadoc warnings introduced when the HDFS javadoc was + made private. (omalley) + + HADOOP-4030. Remove lzop from the default list of codecs. (Arun Murthy via + cdouglas) + + HADOOP-3961. Fix task disk space requirement estimates for virtual + input jobs. Delays limiting task placement until after 10% of the maps + have finished. (Ari Rabkin via omalley) + + HADOOP-2168. Fix problem with C++ record reader's progress not being + reported to framework. (acmurthy via omalley) + + HADOOP-3966. Copy findbugs generated output files to PATCH_DIR while + running test-patch. (Ramya R via lohit) + + HADOOP-4037. Fix the eclipse plugin for versions of kfs and log4j. (nigel + via omalley) + + HADOOP-3950. Cause the Mini MR cluster to wait for task trackers to + register before continuing. (enis via omalley) + + HADOOP-3910. Remove unused ClusterTestDFSNamespaceLogging and + ClusterTestDFS. (Tsz Wo (Nicholas), SZE via cdouglas) + + HADOOP-3954. Disable record skipping by default. (Sharad Agarwal via + cdouglas) + + HADOOP-4050. Fix TestFairScheduler to use absolute paths for the work + directory. (Matei Zaharia via omalley) + + HADOOP-4069. Keep temporary test files from TestKosmosFileSystem under + test.build.data instead of /tmp. (lohit via omalley) + + HADOOP-4078. Create test files for TestKosmosFileSystem in separate + directory under test.build.data. (lohit) + + HADOOP-3968. Fix getFileBlockLocations calls to use FileStatus instead + of Path reflecting the new API. (Pete Wyckoff via lohit) + + HADOOP-3963. libhdfs does not exit on its own, instead it returns error + to the caller and behaves as a true library. (Pete Wyckoff via dhruba) + + HADOOP-4100. Removes the cleanupTask scheduling from the Scheduler + implementations and moves it to the JobTracker. + (Amareshwari Sriramadasu via ddas) + + HADOOP-4097. Make hive work well with speculative execution turned on. + (Joydeep Sen Sarma via dhruba) + + HADOOP-4113. Changes to libhdfs to not exit on its own, rather return + an error code to the caller. (Pete Wyckoff via dhruba) + + HADOOP-4054. Remove duplicate lease removal during edit log loading. + (hairong) + + HADOOP-4071. FSNameSystem.isReplicationInProgress should add an + underReplicated block to the neededReplication queue using method + "add" not "update". (hairong) + + HADOOP-4154. Fix type warnings in WritableUtils. (szetszwo via omalley) + + HADOOP-4133. Log files generated by Hive should reside in the + build directory. (Prasad Chakka via dhruba) + + HADOOP-4094. Hive now has hive-default.xml and hive-site.xml similar + to core hadoop. (Prasad Chakka via dhruba) + + HADOOP-4112. Handles cleanupTask in JobHistory + (Amareshwari Sriramadasu via ddas) + + HADOOP-3831. Very slow reading clients sometimes failed while reading. + (rangadi) + + HADOOP-4155. Use JobTracker's start time while initializing JobHistory's + JobTracker Unique String. (lohit) + + HADOOP-4099. Fix null pointer when using HFTP from an 0.18 server. + (dhruba via omalley) + + HADOOP-3570. Includes user specified libjar files in the client side + classpath path. (Sharad Agarwal via ddas) + + HADOOP-4129. Changed memory limits of TaskTracker and Tasks to be in + KiloBytes rather than bytes. (Vinod Kumar Vavilapalli via acmurthy) + + HADOOP-4139. Optimize Hive multi group-by. + (Namin Jain via dhruba) + + HADOOP-3911. Add a check to fsck options to make sure -files is not + the first option to resolve conflicts with GenericOptionsParser + (lohit) + + HADOOP-3623. Refactor LeaseManager. (szetszwo) + + HADOOP-4125. Handles Reduce cleanup tip on the web ui. + (Amareshwari Sriramadasu via ddas) + + HADOOP-4087. Hive Metastore API for php and python clients. + (Prasad Chakka via dhruba) + + HADOOP-4197. Update DATA_TRANSFER_VERSION for HADOOP-3981. (szetszwo) + + HADOOP-4138. Refactor the Hive SerDe library to better structure + the interfaces to the serializer and de-serializer. + (Zheng Shao via dhruba) + + HADOOP-4195. Close compressor before returning to codec pool. + (acmurthy via omalley) + + HADOOP-2403. Escapes some special characters before logging to + history files. (Amareshwari Sriramadasu via ddas) + + HADOOP-4200. Fix a bug in the test-patch.sh script. + (Ramya R via nigel) + + HADOOP-4084. Add explain plan capabilities to Hive Query Language. + (Ashish Thusoo via dhruba) + + HADOOP-4121. Preserve cause for exception if the initialization of + HistoryViewer for JobHistory fails. (Amareshwari Sri Ramadasu via + acmurthy) + + HADOOP-4213. Fixes NPE in TestLimitTasksPerJobTaskScheduler. + (Sreekanth Ramakrishnan via ddas) + + HADOOP-4077. Setting access and modification time for a file + requires write permissions on the file. (dhruba) + + HADOOP-3592. Fix a couple of possible file leaks in FileUtil + (Bill de hOra via rangadi) + + HADOOP-4120. Hive interactive shell records the time taken by a + query. (Raghotham Murthy via dhruba) + + HADOOP-4090. The hive scripts pick up hadoop from HADOOP_HOME + and then the path. (Raghotham Murthy via dhruba) + + HADOOP-4242. Remove extra ";" in FSDirectory that blocks compilation + in some IDE's. (szetszwo via omalley) + + HADOOP-4249. Fix eclipse path to include the hsqldb.jar. (szetszwo via + omalley) + + HADOOP-4247. Move InputSampler into org.apache.hadoop.mapred.lib, so that + examples.jar doesn't depend on tools.jar. (omalley) + + HADOOP-4269. Fix the deprecation of LineReader by extending the new class + into the old name and deprecating it. Also update the tests to test the + new class. (cdouglas via omalley) + + HADOOP-4280. Fix conversions between seconds in C and milliseconds in + Java for access times for files. (Pete Wyckoff via rangadi) + + HADOOP-4254. -setSpaceQuota command does not convert "TB" extenstion to + terabytes properly. Implementation now uses StringUtils for parsing this. + (Raghu Angadi) + + HADOOP-4259. Findbugs should run over tools.jar also. (cdouglas via + omalley) + + HADOOP-4275. Move public method isJobValidName from JobID to a private + method in JobTracker. (omalley) + + HADOOP-4173. fix failures in TestProcfsBasedProcessTree and + TestTaskTrackerMemoryManager tests. ProcfsBasedProcessTree and + memory management in TaskTracker are disabled on Windows. + (Vinod K V via rangadi) + + HADOOP-4189. Fixes the history blocksize & intertracker protocol version + issues introduced as part of HADOOP-3245. (Amar Kamat via ddas) + + HADOOP-4190. Fixes the backward compatibility issue with Job History. + introduced by HADOOP-3245 and HADOOP-2403. (Amar Kamat via ddas) + + HADOOP-4237. Fixes the TestStreamingBadRecords.testNarrowDown testcase. + (Sharad Agarwal via ddas) + + HADOOP-4274. Capacity scheduler accidently modifies the underlying + data structures when browing the job lists. (Hemanth Yamijala via omalley) + + HADOOP-4309. Fix eclipse-plugin compilation. (cdouglas) + + HADOOP-4232. Fix race condition in JVM reuse when multiple slots become + free. (ddas via acmurthy) + + HADOOP-4302. Fix a race condition in TestReduceFetch that can yield false + negatvies. (cdouglas) + + HADOOP-3942. Update distcp documentation to include features introduced in + HADOOP-3873, HADOOP-3939. (Tsz Wo (Nicholas), SZE via cdouglas) + + HADOOP-4319. fuse-dfs dfs_read function returns as many bytes as it is + told to read unlesss end-of-file is reached. (Pete Wyckoff via dhruba) + + HADOOP-4246. Ensure we have the correct lower bound on the number of + retries for fetching map-outputs; also fixed the case where the reducer + automatically kills on too many unique map-outputs could not be fetched + for small jobs. (Amareshwari Sri Ramadasu via acmurthy) + + HADOOP-4163. Report FSErrors from map output fetch threads instead of + merely logging them. (Sharad Agarwal via cdouglas) + + HADOOP-4261. Adds a setup task for jobs. This is required so that we + don't setup jobs that haven't been inited yet (since init could lead + to job failure). Only after the init has successfully happened do we + launch the setupJob task. (Amareshwari Sriramadasu via ddas) + + HADOOP-4256. Removes Completed and Failed Job tables from + jobqueue_details.jsp. (Sreekanth Ramakrishnan via ddas) + + HADOOP-4267. Occasional exceptions during shutting down HSQLDB is logged + but not rethrown. (enis) + + HADOOP-4018. The number of tasks for a single job cannot exceed a + pre-configured maximum value. (dhruba) + + HADOOP-4288. Fixes a NPE problem in CapacityScheduler. + (Amar Kamat via ddas) + + HADOOP-4014. Create hard links with 'fsutil hardlink' on Windows. (shv) + + HADOOP-4393. Merged org.apache.hadoop.fs.permission.AccessControlException + and org.apache.hadoop.security.AccessControlIOException into a single + class hadoop.security.AccessControlException. (omalley via acmurthy) + + HADOOP-4287. Fixes an issue to do with maintaining counts of running/pending + maps/reduces. (Sreekanth Ramakrishnan via ddas) + + HADOOP-4361. Makes sure that jobs killed from command line are killed + fast (i.e., there is a slot to run the cleanup task soon). + (Amareshwari Sriramadasu via ddas) + + HADOOP-4400. Add "hdfs://" to fs.default.name on quickstart.html. + (Jeff Hammerbacher via omalley) + + HADOOP-4378. Fix TestJobQueueInformation to use SleepJob rather than + WordCount via TestMiniMRWithDFS. (Sreekanth Ramakrishnan via acmurthy) + + HADOOP-4376. Fix formatting in hadoop-default.xml for + hadoop.http.filter.initializers. (Enis Soztutar via acmurthy) + + HADOOP-4410. Adds an extra arg to the API FileUtil.makeShellPath to + determine whether to canonicalize file paths or not. + (Amareshwari Sriramadasu via ddas) + + HADOOP-4236. Ensure un-initialized jobs are killed correctly on + user-demand. (Sharad Agarwal via acmurthy) + + HADOOP-4373. Fix calculation of Guaranteed Capacity for the + capacity-scheduler. (Hemanth Yamijala via acmurthy) + + HADOOP-4053. Schedulers must be notified when jobs complete. (Amar Kamat via omalley) + + HADOOP-4335. Fix FsShell -ls for filesystems without owners/groups. (David + Phillips via cdouglas) + + HADOOP-4426. TestCapacityScheduler broke due to the two commits HADOOP-4053 + and HADOOP-4373. This patch fixes that. (Hemanth Yamijala via ddas) + + HADOOP-4418. Updates documentation in forrest for Mapred, streaming and pipes. + (Amareshwari Sriramadasu via ddas) + + HADOOP-3155. Ensure that there is only one thread fetching + TaskCompletionEvents on TaskTracker re-init. (Dhruba Borthakur via + acmurthy) + + HADOOP-4425. Fix EditLogInputStream to overload the bulk read method. + (cdouglas) + + HADOOP-4427. Adds the new queue/job commands to the manual. + (Sreekanth Ramakrishnan via ddas) + + HADOOP-4278. Increase debug logging for unit test TestDatanodeDeath. + Fix the case when primary is dead. (dhruba via szetszwo) + + HADOOP-4423. Keep block length when the block recovery is triggered by + append. (szetszwo) + + HADOOP-4449. Fix dfsadmin usage. (Raghu Angadi via cdouglas) + + HADOOP-4455. Added TestSerDe so that unit tests can run successfully. + (Ashish Thusoo via dhruba) + + HADOOP-4457. Fixes an input split logging problem introduced by + HADOOP-3245. (Amareshwari Sriramadasu via ddas) + + HADOOP-4464. Separate out TestFileCreationClient from TestFileCreation. + (Tsz Wo (Nicholas), SZE via cdouglas) + + HADOOP-4404. saveFSImage() removes files from a storage directory that do + not correspond to its type. (shv) + + HADOOP-4149. Fix handling of updates to the job priority, by changing the + list of jobs to be keyed by the priority, submit time, and job tracker id. + (Amar Kamat via omalley) + + HADOOP-4296. Fix job client failures by not retiring a job as soon as it + is finished. (dhruba) + + HADOOP-4439. Remove configuration variables that aren't usable yet, in + particular mapred.tasktracker.tasks.maxmemory and mapred.task.max.memory. + (Hemanth Yamijala via omalley) + + HADOOP-4230. Fix for serde2 interface, limit operator, select * operator, + UDF trim functions and sampling. (Ashish Thusoo via dhruba) + + HADOOP-4358. No need to truncate access time in INode. Also fixes NPE + in CreateEditsLog. (Raghu Angadi) + + HADOOP-4387. TestHDFSFileSystemContract fails on windows nightly builds. + (Raghu Angadi) + + HADOOP-4466. Ensure that SequenceFileOutputFormat isn't tied to Writables + and can be used with other Serialization frameworks. (Chris Wensel via + acmurthy) + + HADOOP-4525. Fix ipc.server.ipcnodelay originally missed in in HADOOP-2232. + (cdouglas via Clint Morgan) + + HADOOP-4498. Ensure that JobHistory correctly escapes the job name so that + regex patterns work. (Chris Wensel via acmurthy) + + HADOOP-4446. Modify guaranteed capacity labels in capacity scheduler's UI + to reflect the information being displayed. (Sreekanth Ramakrishnan via + yhemanth) + + HADOOP-4282. Some user facing URLs are not filtered by user filters. + (szetszwo) + + HADOOP-4595. Fixes two race conditions - one to do with updating free slot count, + and another to do with starting the MapEventsFetcher thread. (ddas) + + HADOOP-4552. Fix a deadlock in RPC server. (Raghu Angadi) + + HADOOP-4471. Sort running jobs by priority in the capacity scheduler. + (Amar Kamat via yhemanth) + + HADOOP-4500. Fix MultiFileSplit to get the FileSystem from the relevant + path rather than the JobClient. (Joydeep Sen Sarma via cdouglas) + +Release 0.18.4 - Unreleased + + BUG FIXES + + HADOOP-5114. Remove timeout for accept() in DataNode. This makes accept() + fail in JDK on Windows and causes many tests to fail. (Raghu Angadi) + + HADOOP-5192. Block receiver should not remove a block that's created or + being written by other threads. (hairong) + + HADOOP-5134. FSNamesystem#commitBlockSynchronization adds under-construction + block locations to blocksMap. (Dhruba Borthakur via hairong) + + HADOOP-5412. Simulated DataNode should not write to a block that's being + written by another thread. (hairong) + + HADOOP-5465. Fix the problem of blocks remaining under-replicated by + providing synchronized modification to the counter xmitsInProgress in + DataNode. (hairong) + + HADOOP-5557. Fixes some minor problems in TestOverReplicatedBlocks. + (szetszwo) + + HADOOP-5644. Namenode is stuck in safe mode. (suresh Srinivas via hairong) + + HADOOP-6017. Lease Manager in NameNode does not handle certain characters + in filenames. This results in fatal errors in Secondary NameNode and while + restrating NameNode. (Tsz Wo (Nicholas), SZE via rangadi) + +Release 0.18.3 - 2009-01-27 + + IMPROVEMENTS + + HADOOP-4150. Include librecordio in hadoop releases. (Giridharan Kesavan + via acmurthy) + + HADOOP-4668. Improve documentation for setCombinerClass to clarify the + restrictions on combiners. (omalley) + + BUG FIXES + + HADOOP-4499. DFSClient should invoke checksumOk only once. (Raghu Angadi) + + HADOOP-4597. Calculate mis-replicated blocks when safe-mode is turned + off manually. (shv) + + HADOOP-3121. lsr should keep listing the remaining items but not + terminate if there is any IOException. (szetszwo) + + HADOOP-4610. Always calculate mis-replicated blocks when safe-mode is + turned off. (shv) + + HADOOP-3883. Limit namenode to assign at most one generation stamp for + a particular block within a short period. (szetszwo) + + HADOOP-4556. Block went missing. (hairong) + + HADOOP-4643. NameNode should exclude excessive replicas when counting + live replicas for a block. (hairong) + + HADOOP-4703. Should not wait for proxy forever in lease recovering. + (szetszwo) + + HADOOP-4647. NamenodeFsck should close the DFSClient it has created. + (szetszwo) + + HADOOP-4616. Fuse-dfs can handle bad values from FileSystem.read call. + (Pete Wyckoff via dhruba) + + HADOOP-4061. Throttle Datanode decommission monitoring in Namenode. + (szetszwo) + + HADOOP-4659. Root cause of connection failure is being lost to code that + uses it for delaying startup. (Steve Loughran and Hairong via hairong) + + HADOOP-4614. Lazily open segments when merging map spills to avoid using + too many file descriptors. (Yuri Pradkin via cdouglas) + + HADOOP-4257. The DFS client should pick only one datanode as the candidate + to initiate lease recovery. (Tsz Wo (Nicholas), SZE via dhruba) + + HADOOP-4713. Fix librecordio to handle records larger than 64k. (Christian + Kunz via cdouglas) + + HADOOP-4635. Fix a memory leak in fuse dfs. (pete wyckoff via mahadev) + + HADOOP-4714. Report status between merges and make the number of records + between progress reports configurable. (Jothi Padmanabhan via cdouglas) + + HADOOP-4726. Fix documentation typos "the the". (Edward J. Yoon via + szetszwo) + + HADOOP-4679. Datanode prints tons of log messages: waiting for threadgroup + to exit, active threads is XX. (hairong) + + HADOOP-4746. Job output directory should be normalized. (hairong) + + HADOOP-4717. Removal of default port# in NameNode.getUri() causes a + map/reduce job failed to prompt temporary output. (hairong) + + HADOOP-4778. Check for zero size block meta file when updating a block. + (szetszwo) + + HADOOP-4742. Replica gets deleted by mistake. (Wang Xu via hairong) + + HADOOP-4702. Failed block replication leaves an incomplete block in + receiver's tmp data directory. (hairong) + + HADOOP-4613. Fix block browsing on Web UI. (Johan Oskarsson via shv) + + HADOOP-4806. HDFS rename should not use src path as a regular expression. + (szetszwo) + + HADOOP-4795. Prevent lease monitor getting into an infinite loop when + leases and the namespace tree does not match. (szetszwo) + + HADOOP-4620. Fixes Streaming to handle well the cases of map/reduce with empty + input/output. (Ravi Gummadi via ddas) + + HADOOP-4857. Fixes TestUlimit to have exactly 1 map in the jobs spawned. + (Ravi Gummadi via ddas) + + HADOOP-4810. Data lost at cluster startup time. (hairong) + + HADOOP-4797. Improve how RPC server reads and writes large buffers. Avoids + soft-leak of direct buffers and excess copies in NIO layer. (Raghu Angadi) + + HADOOP-4840. TestNodeCount sometimes fails with NullPointerException. + (hairong) + + HADOOP-4904. Fix deadlock while leaving safe mode. (shv) + + HADOOP-1980. 'dfsadmin -safemode enter' should prevent the namenode from + leaving safemode automatically. (shv & Raghu Angadi) + + HADOOP-4951. Lease monitor should acquire the LeaseManager lock but not the + Monitor lock. (szetszwo) + + HADOOP-4935. processMisReplicatedBlocks() should not clear + excessReplicateMap. (shv) + + HADOOP-4961. Fix ConcurrentModificationException in lease recovery + of empty files. (shv) + + HADOOP-4971. A long (unexpected) delay at datanodes could make subsequent + block reports from many datanode at the same time. (Raghu Angadi) + + HADOOP-4910. NameNode should exclude replicas when choosing excessive + replicas to delete to avoid data lose. (hairong) + + HADOOP-4983. Fixes a problem in updating Counters in the status reporting. + (Amareshwari Sriramadasu via ddas) + +Release 0.18.2 - 2008-11-03 + + BUG FIXES + + HADOOP-3614. Fix a bug that Datanode may use an old GenerationStamp to get + meta file. (szetszwo) + + HADOOP-4314. Simulated datanodes should not include blocks that are still + being written in their block report. (Raghu Angadi) + + HADOOP-4228. dfs datanode metrics, bytes_read and bytes_written, overflow + due to incorrect type used. (hairong) + + HADOOP-4395. The FSEditLog loading is incorrect for the case OP_SET_OWNER. + (szetszwo) + + HADOOP-4351. FSNamesystem.getBlockLocationsInternal throws + ArrayIndexOutOfBoundsException. (hairong) + + HADOOP-4403. Make TestLeaseRecovery and TestFileCreation more robust. + (szetszwo) + + HADOOP-4292. Do not support append() for LocalFileSystem. (hairong) + + HADOOP-4399. Make fuse-dfs multi-thread access safe. + (Pete Wyckoff via dhruba) + + HADOOP-4369. Use setMetric(...) instead of incrMetric(...) for metrics + averages. (Brian Bockelman via szetszwo) + + HADOOP-4469. Rename and add the ant task jar file to the tar file. (nigel) + + HADOOP-3914. DFSClient sends Checksum Ok only once for a block. + (Christian Kunz via hairong) + + HADOOP-4467. SerializationFactory now uses the current context ClassLoader + allowing for user supplied Serialization instances. (Chris Wensel via + acmurthy) + + HADOOP-4517. Release FSDataset lock before joining ongoing create threads. + (szetszwo) + + HADOOP-4526. fsck failing with NullPointerException. (hairong) + + HADOOP-4483 Honor the max parameter in DatanodeDescriptor.getBlockArray(..) + (Ahad Rana and Hairong Kuang via szetszwo) + + HADOOP-4340. Correctly set the exit code from JobShell.main so that the + 'hadoop jar' command returns the right code to the user. (acmurthy) + + NEW FEATURES + + HADOOP-2421. Add jdiff output to documentation, listing all API + changes from the prior release. (cutting) + +Release 0.18.1 - 2008-09-17 + + IMPROVEMENTS + + HADOOP-3934. Upgrade log4j to 1.2.15. (omalley) + + BUG FIXES + + HADOOP-3995. In case of quota failure on HDFS, rename does not restore + source filename. (rangadi) + + HADOOP-3821. Prevent SequenceFile and IFile from duplicating codecs in + CodecPool when closed more than once. (Arun Murthy via cdouglas) + + HADOOP-4040. Remove coded default of the IPC idle connection timeout + from the TaskTracker, which was causing HDFS client connections to not be + collected. (ddas via omalley) + + HADOOP-4046. Made WritableComparable's constructor protected instead of + private to re-enable class derivation. (cdouglas via omalley) + + HADOOP-3940. Fix in-memory merge condition to wait when there are no map + outputs or when the final map outputs are being fetched without contention. + (cdouglas) + +Release 0.18.0 - 2008-08-19 + + INCOMPATIBLE CHANGES + + HADOOP-2703. The default options to fsck skips checking files + that are being written to. The output of fsck is incompatible + with previous release. (lohit vijayarenu via dhruba) + + HADOOP-2865. FsShell.ls() printout format changed to print file names + in the end of the line. (Edward J. Yoon via shv) + + HADOOP-3283. The Datanode has a RPC server. It currently supports + two RPCs: the first RPC retrives the metadata about a block and the + second RPC sets the generation stamp of an existing block. + (Tsz Wo (Nicholas), SZE via dhruba) + + HADOOP-2797. Code related to upgrading to 0.14 (Block CRCs) is + removed. As result, upgrade to 0.18 or later from 0.13 or earlier + is not supported. If upgrading from 0.13 or earlier is required, + please upgrade to an intermediate version (0.14-0.17) and then + to this version. (rangadi) + + HADOOP-544. This issue introduces new classes JobID, TaskID and + TaskAttemptID, which should be used instead of their string counterparts. + Functions in JobClient, TaskReport, RunningJob, jobcontrol.Job and + TaskCompletionEvent that use string arguments are deprecated in favor + of the corresponding ones that use ID objects. Applications can use + xxxID.toString() and xxxID.forName() methods to convert/restore objects + to/from strings. (Enis Soztutar via ddas) + + HADOOP-2188. RPC client sends a ping rather than throw timeouts. + RPC server does not throw away old RPCs. If clients and the server are on + different versions, they are not able to function well. In addition, + The property ipc.client.timeout is removed from the default hadoop + configuration. It also removes metrics RpcOpsDiscardedOPsNum. (hairong) + + HADOOP-2181. This issue adds logging for input splits in Jobtracker log + and jobHistory log. Also adds web UI for viewing input splits in job UI + and history UI. (Amareshwari Sriramadasu via ddas) + + HADOOP-3226. Run combiners multiple times over map outputs as they + are merged in both the map and the reduce tasks. (cdouglas via omalley) + + HADOOP-3329. DatanodeDescriptor objects should not be stored in the + fsimage. (dhruba) + + HADOOP-2656. The Block object has a generation stamp inside it. + Existing blocks get a generation stamp of 0. This is needed to support + appends. (dhruba) + + HADOOP-3390. Removed deprecated ClientProtocol.abandonFileInProgress(). + (Tsz Wo (Nicholas), SZE via rangadi) + + HADOOP-3405. Made some map/reduce internal classes non-public: + MapTaskStatus, ReduceTaskStatus, JobSubmissionProtocol, + CompletedJobStatusStore. (enis via omaley) + + HADOOP-3265. Removed depcrecated API getFileCacheHints(). + (Lohit Vijayarenu via rangadi) + + HADOOP-3310. The namenode instructs the primary datanode to do lease + recovery. The block gets a new generation stamp. + (Tsz Wo (Nicholas), SZE via dhruba) + + HADOOP-2909. Improve IPC idle connection management. Property + ipc.client.maxidletime is removed from the default configuration, + instead it is defined as twice of the ipc.client.connection.maxidletime. + A connection with outstanding requests won't be treated as idle. + (hairong) + + HADOOP-3459. Change in the output format of dfs -ls to more closely match + /bin/ls. New format is: perm repl owner group size date name + (Mukund Madhugiri via omally) + + HADOOP-3113. An fsync invoked on a HDFS file really really + persists data! The datanode moves blocks in the tmp directory to + the real block directory on a datanode-restart. (dhruba) + + HADOOP-3452. Change fsck to return non-zero status for a corrupt + FileSystem. (lohit vijayarenu via cdouglas) + + HADOOP-3193. Include the address of the client that found the corrupted + block in the log. Also include a CorruptedBlocks metric to track the size + of the corrupted block map. (cdouglas) + + HADOOP-3512. Separate out the tools into a tools jar. (omalley) + + HADOOP-3598. Ensure that temporary task-output directories are not created + if they are not necessary e.g. for Maps with no side-effect files. + (acmurthy) + + HADOOP-3665. Modify WritableComparator so that it only creates instances + of the keytype if the type does not define a WritableComparator. Calling + the superclass compare will throw a NullPointerException. Also define + a RawComparator for NullWritable and permit it to be written as a key + to SequenceFiles. (cdouglas) + + HADOOP-3673. Avoid deadlock caused by DataNode RPC receoverBlock(). + (Tsz Wo (Nicholas), SZE via rangadi) + + NEW FEATURES + + HADOOP-3074. Provides a UrlStreamHandler for DFS and other FS, + relying on FileSystem (taton) + + HADOOP-2585. Name-node imports namespace data from a recent checkpoint + accessible via a NFS mount. (shv) + + HADOOP-3061. Writable types for doubles and bytes. (Andrzej + Bialecki via omalley) + + HADOOP-2857. Allow libhdfs to set jvm options. (Craig Macdonald + via omalley) + + HADOOP-3317. Add default port for HDFS namenode. The port in + "hdfs:" URIs now defaults to 8020, so that one may simply use URIs + of the form "hdfs://example.com/dir/file". (cutting) + + HADOOP-2019. Adds support for .tar, .tgz and .tar.gz files in + DistributedCache (Amareshwari Sriramadasu via ddas) + + HADOOP-3058. Add FSNamesystem status metrics. + (Lohit Vjayarenu via rangadi) + + HADOOP-1915. Allow users to specify counters via strings instead + of enumerations. (tomwhite via omalley) + + HADOOP-2065. Delay invalidating corrupt replicas of block until its + is removed from under replicated state. If all replicas are found to + be corrupt, retain all copies and mark the block as corrupt. + (Lohit Vjayarenu via rangadi) + + HADOOP-3221. Adds org.apache.hadoop.mapred.lib.NLineInputFormat, which + splits files into splits each of N lines. N can be specified by + configuration property "mapred.line.input.format.linespermap", which + defaults to 1. (Amareshwari Sriramadasu via ddas) + + HADOOP-3336. Direct a subset of annotated FSNamesystem calls for audit + logging. (cdouglas) + + HADOOP-3400. A new API FileSystem.deleteOnExit() that facilitates + handling of temporary files in HDFS. (dhruba) + + HADOOP-4. Add fuse-dfs to contrib, permitting one to mount an + HDFS filesystem on systems that support FUSE, e.g., Linux. + (Pete Wyckoff via cutting) + + HADOOP-3246. Add FTPFileSystem. (Ankur Goel via cutting) + + HADOOP-3250. Extend FileSystem API to allow appending to files. + (Tsz Wo (Nicholas), SZE via cdouglas) + + HADOOP-3177. Implement Syncable interface for FileSystem. + (Tsz Wo (Nicholas), SZE via dhruba) + + HADOOP-1328. Implement user counters in streaming. (tomwhite via + omalley) + + HADOOP-3187. Quotas for namespace management. (Hairong Kuang via ddas) + + HADOOP-3307. Support for Archives in Hadoop. (Mahadev Konar via ddas) + + HADOOP-3460. Add SequenceFileAsBinaryOutputFormat to permit direct + writes of serialized data. (Koji Noguchi via cdouglas) + + HADOOP-3230. Add ability to get counter values from command + line. (tomwhite via omalley) + + HADOOP-930. Add support for native S3 files. (tomwhite via cutting) + + HADOOP-3502. Quota API needs documentation in Forrest. (hairong) + + HADOOP-3413. Allow SequenceFile.Reader to use serialization + framework. (tomwhite via omalley) + + HADOOP-3541. Import of the namespace from a checkpoint documented + in hadoop user guide. (shv) + + IMPROVEMENTS + + HADOOP-3677. Simplify generation stamp upgrade by making is a + local upgrade on datandodes. Deleted distributed upgrade. + (rangadi) + + HADOOP-2928. Remove deprecated FileSystem.getContentLength(). + (Lohit Vijayarenu via rangadi) + + HADOOP-3130. Make the connect timeout smaller for getFile. + (Amar Ramesh Kamat via ddas) + + HADOOP-3160. Remove deprecated exists() from ClientProtocol and + FSNamesystem (Lohit Vjayarenu via rangadi) + + HADOOP-2910. Throttle IPC Clients during bursts of requests or + server slowdown. Clients retry connection for up to 15 minutes + when socket connection times out. (hairong) + + HADOOP-3295. Allow TextOutputFormat to use configurable spearators. + (Zheng Shao via cdouglas). + + HADOOP-3308. Improve QuickSort by excluding values eq the pivot from the + partition. (cdouglas) + + HADOOP-2461. Trim property names in configuration. + (Tsz Wo (Nicholas), SZE via shv) + + HADOOP-2799. Deprecate o.a.h.io.Closable in favor of java.io.Closable. + (Tsz Wo (Nicholas), SZE via cdouglas) + + HADOOP-3345. Enhance the hudson-test-patch target to cleanup messages, + fix minor defects, and add eclipse plugin and python unit tests. (nigel) + + HADOOP-3144. Improve robustness of LineRecordReader by defining a maximum + line length (mapred.linerecordreader.maxlength), thereby avoiding reading + too far into the following split. (Zheng Shao via cdouglas) + + HADOOP-3334. Move lease handling from FSNamesystem into a seperate class. + (Tsz Wo (Nicholas), SZE via rangadi) + + HADOOP-3332. Reduces the amount of logging in Reducer's shuffle phase. + (Devaraj Das) + + HADOOP-3355. Enhances Configuration class to accept hex numbers for getInt + and getLong. (Amareshwari Sriramadasu via ddas) + + HADOOP-3350. Add an argument to distcp to permit the user to limit the + number of maps. (cdouglas) + + HADOOP-3013. Add corrupt block reporting to fsck. + (lohit vijayarenu via cdouglas) + + HADOOP-3377. Remove TaskRunner::replaceAll and replace with equivalent + String::replace. (Brice Arnould via cdouglas) + + HADOOP-3398. Minor improvement to a utility function in that participates + in backoff calculation. (cdouglas) + + HADOOP-3381. Clear referenced when directories are deleted so that + effect of memory leaks are not multiplied. (rangadi) + + HADOOP-2867. Adds the task's CWD to its LD_LIBRARY_PATH. + (Amareshwari Sriramadasu via ddas) + + HADOOP-3232. DU class runs the 'du' command in a seperate thread so + that it does not block user. DataNode misses heartbeats in large + nodes otherwise. (Johan Oskarsson via rangadi) + + HADOOP-3035. During block transfers between datanodes, the receiving + datanode, now can report corrupt replicas received from src node to + the namenode. (Lohit Vijayarenu via rangadi) + + HADOOP-3434. Retain the cause of the bind failure in Server::bind. + (Steve Loughran via cdouglas) + + HADOOP-3429. Increases the size of the buffers used for the communication + for Streaming jobs. (Amareshwari Sriramadasu via ddas) + + HADOOP-3486. Change default for initial block report to 0 seconds + and document it. (Sanjay Radia via omalley) + + HADOOP-3448. Improve the text in the assertion making sure the + layout versions are consistent in the data node. (Steve Loughran + via omalley) + + HADOOP-2095. Improve the Map-Reduce shuffle/merge by cutting down + buffer-copies; changed intermediate sort/merge to use the new IFile format + rather than SequenceFiles and compression of map-outputs is now + implemented by compressing the entire file rather than SequenceFile + compression. Shuffle also has been changed to use a simple byte-buffer + manager rather than the InMemoryFileSystem. + Configuration changes to hadoop-default.xml: + deprecated mapred.map.output.compression.type + (acmurthy) + + HADOOP-236. JobTacker now refuses connection from a task tracker with a + different version number. (Sharad Agarwal via ddas) + + HADOOP-3427. Improves the shuffle scheduler. It now waits for notifications + from shuffle threads when it has scheduled enough, before scheduling more. + (ddas) + + HADOOP-2393. Moves the handling of dir deletions in the tasktracker to + a separate thread. (Amareshwari Sriramadasu via ddas) + + HADOOP-3501. Deprecate InMemoryFileSystem. (cutting via omalley) + + HADOOP-3366. Stall the shuffle while in-memory merge is in progress. + (acmurthy) + + HADOOP-2916. Refactor src structure, but leave package structure alone. + (Raghu Angadi via mukund) + + HADOOP-3492. Add forrest documentation for user archives. + (Mahadev Konar via hairong) + + HADOOP-3467. Improve documentation for FileSystem::deleteOnExit. + (Tsz Wo (Nicholas), SZE via cdouglas) + + HADOOP-3379. Documents stream.non.zero.exit.status.is.failure for Streaming. + (Amareshwari Sriramadasu via ddas) + + HADOOP-3096. Improves documentation about the Task Execution Environment in + the Map-Reduce tutorial. (Amareshwari Sriramadasu via ddas) + + HADOOP-2984. Add forrest documentation for DistCp. (cdouglas) + + HADOOP-3406. Add forrest documentation for Profiling. + (Amareshwari Sriramadasu via ddas) + + HADOOP-2762. Add forrest documentation for controls of memory limits on + hadoop daemons and Map-Reduce tasks. (Amareshwari Sriramadasu via ddas) + + HADOOP-3535. Fix documentation and name of IOUtils.close to + reflect that it should only be used in cleanup contexts. (omalley) + + HADOOP-3593. Updates the mapred tutorial. (ddas) + + HADOOP-3547. Documents the way in which native libraries can be distributed + via the DistributedCache. (Amareshwari Sriramadasu via ddas) + + HADOOP-3606. Updates the Streaming doc. (Amareshwari Sriramadasu via ddas) + + HADOOP-3532. Add jdiff reports to the build scripts. (omalley) + + HADOOP-3100. Develop tests to test the DFS command line interface. (mukund) + + HADOOP-3688. Fix up HDFS docs. (Robert Chansler via hairong) + + OPTIMIZATIONS + + HADOOP-3274. The default constructor of BytesWritable creates empty + byte array. (Tsz Wo (Nicholas), SZE via shv) + + HADOOP-3272. Remove redundant copy of Block object in BlocksMap. + (Lohit Vjayarenu via shv) + + HADOOP-3164. Reduce DataNode CPU usage by using FileChannel.tranferTo(). + On Linux DataNode takes 5 times less CPU while serving data. Results may + vary on other platforms. (rangadi) + + HADOOP-3248. Optimization of saveFSImage. (Dhruba via shv) + + HADOOP-3297. Fetch more task completion events from the job + tracker and task tracker. (ddas via omalley) + + HADOOP-3364. Faster image and log edits loading. (shv) + + HADOOP-3369. Fast block processing during name-node startup. (shv) + + HADOOP-1702. Reduce buffer copies when data is written to DFS. + DataNodes take 30% less CPU while writing data. (rangadi) + + HADOOP-3095. Speed up split generation in the FileInputSplit, + especially for non-HDFS file systems. Deprecates + InputFormat.validateInput. (tomwhite via omalley) + + HADOOP-3552. Add forrest documentation for Hadoop commands. + (Sharad Agarwal via cdouglas) + + BUG FIXES + + HADOOP-2905. 'fsck -move' triggers NPE in NameNode. + (Lohit Vjayarenu via rangadi) + + Increment ClientProtocol.versionID missed by HADOOP-2585. (shv) + + HADOOP-3254. Restructure internal namenode methods that process + heartbeats to use well-defined BlockCommand object(s) instead of + using the base java Object. (Tsz Wo (Nicholas), SZE via dhruba) + + HADOOP-3176. Change lease record when a open-for-write-file + gets renamed. (dhruba) + + HADOOP-3269. Fix a case when namenode fails to restart + while processing a lease record. ((Tsz Wo (Nicholas), SZE via dhruba) + + HADOOP-3282. Port issues in TestCheckpoint resolved. (shv) + + HADOOP-3268. file:// URLs issue in TestUrlStreamHandler under Windows. + (taton) + + HADOOP-3127. Deleting files in trash should really remove them. + (Brice Arnould via omalley) + + HADOOP-3300. Fix locking of explicit locks in NetworkTopology. + (tomwhite via omalley) + + HADOOP-3270. Constant DatanodeCommands are stored in static final + immutable variables for better code clarity. + (Tsz Wo (Nicholas), SZE via dhruba) + + HADOOP-2793. Fix broken links for worst performing shuffle tasks in + the job history page. (Amareshwari Sriramadasu via ddas) + + HADOOP-3313. Avoid unnecessary calls to System.currentTimeMillis + in RPC::Invoker. (cdouglas) + + HADOOP-3318. Recognize "Darwin" as an alias for "Mac OS X" to + support Soylatte. (Sam Pullara via omalley) + + HADOOP-3301. Fix misleading error message when S3 URI hostname + contains an underscore. (tomwhite via omalley) + + HADOOP-3338. Fix Eclipse plugin to compile after HADOOP-544 was + committed. Updated all references to use the new JobID representation. + (taton via nigel) + + HADOOP-3337. Loading FSEditLog was broken by HADOOP-3283 since it + changed Writable serialization of DatanodeInfo. This patch handles it. + (Tsz Wo (Nicholas), SZE via rangadi) + + HADOOP-3101. Prevent JobClient from throwing an exception when printing + usage. (Edward J. Yoon via cdouglas) + + HADOOP-3119. Update javadoc for Text::getBytes to better describe its + behavior. (Tim Nelson via cdouglas) + + HADOOP-2294. Fix documentation in libhdfs to refer to the correct free + function. (Craig Macdonald via cdouglas) + + HADOOP-3335. Prevent the libhdfs build from deleting the wrong + files on make clean. (cutting via omalley) + + HADOOP-2930. Make {start,stop}-balancer.sh work even if hadoop-daemon.sh + is not in the PATH. (Spiros Papadimitriou via hairong) + + HADOOP-3085. Catch Exception in metrics util classes to ensure that + misconfigured metrics don't prevent others from updating. (cdouglas) + + HADOOP-3299. CompositeInputFormat should configure the sub-input + formats. (cdouglas via omalley) + + HADOOP-3309. Lower io.sort.mb and fs.inmemory.size.mb for MiniMRDFSSort + unit test so it passes on Windows. (lohit vijayarenu via cdouglas) + + HADOOP-3348. TestUrlStreamHandler should set URLStreamFactory after + DataNodes are initialized. (Lohit Vijayarenu via rangadi) + + HADOOP-3371. Ignore InstanceAlreadyExistsException from + MBeanUtil::registerMBean. (lohit vijayarenu via cdouglas) + + HADOOP-3349. A file rename was incorrectly changing the name inside a + lease record. (Tsz Wo (Nicholas), SZE via dhruba) + + HADOOP-3365. Removes an unnecessary copy of the key from SegmentDescriptor + to MergeQueue. (Devaraj Das) + + HADOOP-3388. Fix for TestDatanodeBlockScanner to handle blocks with + generation stamps in them. (dhruba) + + HADOOP-3203. Fixes TaskTracker::localizeJob to pass correct file sizes + for the jarfile and the jobfile. (Amareshwari Sriramadasu via ddas) + + HADOOP-3391. Fix a findbugs warning introduced by HADOOP-3248 (rangadi) + + HADOOP-3393. Fix datanode shutdown to call DataBlockScanner::shutdown and + close its log, even if the scanner thread is not running. (lohit vijayarenu + via cdouglas) + + HADOOP-3399. A debug message was logged at info level. (rangadi) + + HADOOP-3396. TestDatanodeBlockScanner occationally fails. + (Lohit Vijayarenu via rangadi) + + HADOOP-3339. Some of the failures on 3rd datanode in DFS write pipelie + are not detected properly. This could lead to hard failure of client's + write operation. (rangadi) + + HADOOP-3409. Namenode should save the root inode into fsimage. (hairong) + + HADOOP-3296. Fix task cache to work for more than two levels in the cache + hierarchy. This also adds a new counter to track cache hits at levels + greater than two. (Amar Kamat via cdouglas) + + HADOOP-3375. Lease paths were sometimes not removed from + LeaseManager.sortedLeasesByPath. (Tsz Wo (Nicholas), SZE via dhruba) + + HADOOP-3424. Values returned by getPartition should be checked to + make sure they are in the range 0 to #reduces - 1 (cdouglas via + omalley) + + HADOOP-3408. Change FSNamesystem to send its metrics as integers to + accommodate collectors that don't support long values. (lohit vijayarenu + via cdouglas) + + HADOOP-3403. Fixes a problem in the JobTracker to do with handling of lost + tasktrackers. (Arun Murthy via ddas) + + HADOOP-1318. Completed maps are not failed if the number of reducers are + zero. (Amareshwari Sriramadasu via ddas). + + HADOOP-3351. Fixes the history viewer tool to not do huge StringBuffer + allocations. (Amareshwari Sriramadasu via ddas) + + HADOOP-3419. Fixes TestFsck to wait for updates to happen before + checking results to make the test more reliable. (Lohit Vijaya + Renu via omalley) + + HADOOP-3259. Makes failure to read system properties due to a + security manager non-fatal. (Edward Yoon via omalley) + + HADOOP-3451. Update libhdfs to use FileSystem::getFileBlockLocations + instead of removed getFileCacheHints. (lohit vijayarenu via cdouglas) + + HADOOP-3401. Update FileBench to set the new + "mapred.work.output.dir" property to work post-3041. (cdouglas via omalley) + + HADOOP-2669. DFSClient locks pendingCreates appropriately. (dhruba) + + HADOOP-3410. Fix KFS implemenation to return correct file + modification time. (Sriram Rao via cutting) + + HADOOP-3340. Fix DFS metrics for BlocksReplicated, HeartbeatsNum, and + BlockReportsAverageTime. (lohit vijayarenu via cdouglas) + + HADOOP-3435. Remove the assuption in the scripts that bash is at + /bin/bash and fix the test patch to require bash instead of sh. + (Brice Arnould via omalley) + + HADOOP-3471. Fix spurious errors from TestIndexedSort and add additional + logging to let failures be reproducible. (cdouglas) + + HADOOP-3443. Avoid copying map output across partitions when renaming a + single spill. (omalley via cdouglas) + + HADOOP-3454. Fix Text::find to search only valid byte ranges. (Chad Whipkey + via cdouglas) + + HADOOP-3417. Removes the static configuration variable, + commandLineConfig from JobClient. Moves the cli parsing from + JobShell to GenericOptionsParser. Thus removes the class + org.apache.hadoop.mapred.JobShell. (Amareshwari Sriramadasu via + ddas) + + HADOOP-2132. Only RUNNING/PREP jobs can be killed. (Jothi Padmanabhan + via ddas) + + HADOOP-3476. Code cleanup in fuse-dfs. + (Peter Wyckoff via dhruba) + + HADOOP-2427. Ensure that the cwd of completed tasks is cleaned-up + correctly on task-completion. (Amareshwari Sri Ramadasu via acmurthy) + + HADOOP-2565. Remove DFSPath cache of FileStatus. + (Tsz Wo (Nicholas), SZE via hairong) + + HADOOP-3326. Cleanup the local-fs and in-memory merge in the ReduceTask by + spawing only one thread each for the on-disk and in-memory merge. + (Sharad Agarwal via acmurthy) + + HADOOP-3493. Fix TestStreamingFailure to use FileUtil.fullyDelete to + ensure correct cleanup. (Lohit Vijayarenu via acmurthy) + + HADOOP-3455. Fix NPE in ipc.Client in case of connection failure and + improve its synchronization. (hairong) + + HADOOP-3240. Fix a testcase to not create files in the current directory. + Instead the file is created in the test directory (Mahadev Konar via ddas) + + HADOOP-3496. Fix failure in TestHarFileSystem.testArchives due to change + in HADOOP-3095. (tomwhite) + + HADOOP-3135. Get the system directory from the JobTracker instead of from + the conf. (Subramaniam Krishnan via ddas) + + HADOOP-3503. Fix a race condition when client and namenode start + simultaneous recovery of the same block. (dhruba & Tsz Wo + (Nicholas), SZE) + + HADOOP-3440. Fixes DistributedCache to not create symlinks for paths which + don't have fragments even when createSymLink is true. + (Abhijit Bagri via ddas) + + HADOOP-3463. Hadoop-daemons script should cd to $HADOOP_HOME. (omalley) + + HADOOP-3489. Fix NPE in SafeModeMonitor. (Lohit Vijayarenu via shv) + + HADOOP-3509. Fix NPE in FSNamesystem.close. (Tsz Wo (Nicholas), SZE via + shv) + + HADOOP-3491. Name-node shutdown causes InterruptedException in + ResolutionMonitor. (Lohit Vijayarenu via shv) + + HADOOP-3511. Fixes namenode image to not set the root's quota to an + invalid value when the quota was not saved in the image. (hairong) + + HADOOP-3516. Ensure the JobClient in HadoopArchives is initialized + with a configuration. (Subramaniam Krishnan via omalley) + + HADOOP-3513. Improve NNThroughputBenchmark log messages. (shv) + + HADOOP-3519. Fix NPE in DFS FileSystem rename. (hairong via tomwhite) + + HADOOP-3528. Metrics FilesCreated and files_deleted metrics + do not match. (Lohit via Mahadev) + + HADOOP-3418. When a directory is deleted, any leases that point to files + in the subdirectory are removed. ((Tsz Wo (Nicholas), SZE via dhruba) + + HADOOP-3542. Diables the creation of _logs directory for the archives + directory. (Mahadev Konar via ddas) + + HADOOP-3544. Fixes a documentation issue for hadoop archives. + (Mahadev Konar via ddas) + + HADOOP-3517. Fixes a problem in the reducer due to which the last InMemory + merge may be missed. (Arun Murthy via ddas) + + HADOOP-3548. Fixes build.xml to copy all *.jar files to the dist. + (Owen O'Malley via ddas) + + HADOOP-3363. Fix unformatted storage detection in FSImage. (shv) + + HADOOP-3560. Fixes a problem to do with split creation in archives. + (Mahadev Konar via ddas) + + HADOOP-3545. Fixes a overflow problem in archives. + (Mahadev Konar via ddas) + + HADOOP-3561. Prevent the trash from deleting its parent directories. + (cdouglas) + + HADOOP-3575. Fix the clover ant target after package refactoring. + (Nigel Daley via cdouglas) + + HADOOP-3539. Fix the tool path in the bin/hadoop script under + cygwin. (Tsz Wo (Nicholas), Sze via omalley) + + HADOOP-3520. TestDFSUpgradeFromImage triggers a race condition in the + Upgrade Manager. Fixed. (dhruba) + + HADOOP-3586. Provide deprecated, backwards compatibile semantics for the + combiner to be run once and only once on each record. (cdouglas) + + HADOOP-3533. Add deprecated methods to provide API compatibility + between 0.18 and 0.17. Remove the deprecated methods in trunk. (omalley) + + HADOOP-3580. Fixes a problem to do with specifying a har as an input to + a job. (Mahadev Konar via ddas) + + HADOOP-3333. Don't assign a task to a tasktracker that it failed to + execute earlier (used to happen in the case of lost tasktrackers where + the tasktracker would reinitialize and bind to a different port). + (Jothi Padmanabhan and Arun Murthy via ddas) + + HADOOP-3534. Log IOExceptions that happen in closing the name + system when the NameNode shuts down. (Tsz Wo (Nicholas) Sze via omalley) + + HADOOP-3546. TaskTracker re-initialization gets stuck in cleaning up. + (Amareshwari Sriramadasu via ddas) + + HADOOP-3576. Fix NullPointerException when renaming a directory + to its subdirectory. (Tse Wo (Nicholas), SZE via hairong) + + HADOOP-3320. Fix NullPointerException in NetworkTopology.getDistance(). + (hairong) + + HADOOP-3569. KFS input stream read() now correctly reads 1 byte + instead of 4. (Sriram Rao via omalley) + + HADOOP-3599. Fix JobConf::setCombineOnceOnly to modify the instance rather + than a parameter. (Owen O'Malley via cdouglas) + + HADOOP-3590. Null pointer exception in JobTracker when the task tracker is + not yet resolved. (Amar Ramesh Kamat via ddas) + + HADOOP-3603. Fix MapOutputCollector to spill when io.sort.spill.percent is + 1.0 and to detect spills when emitted records write no data. (cdouglas) + + HADOOP-3615. Set DatanodeProtocol.versionID to the correct value. + (Tsz Wo (Nicholas), SZE via cdouglas) + + HADOOP-3559. Fix the libhdfs test script and config to work with the + current semantics. (lohit vijayarenu via cdouglas) + + HADOOP-3480. Need to update Eclipse template to reflect current trunk. + (Brice Arnould via tomwhite) + + HADOOP-3588. Fixed usability issues with archives. (mahadev) + + HADOOP-3635. Uncaught exception in DataBlockScanner. + (Tsz Wo (Nicholas), SZE via hairong) + + HADOOP-3639. Exception when closing DFSClient while multiple files are + open. (Benjamin Gufler via hairong) + + HADOOP-3572. SetQuotas usage interface has some minor bugs. (hairong) + + HADOOP-3649. Fix bug in removing blocks from the corrupted block map. + (Lohit Vijayarenu via shv) + + HADOOP-3604. Work around a JVM synchronization problem observed while + retrieving the address of direct buffers from compression code by obtaining + a lock during this call. (Arun C Murthy via cdouglas) + + HADOOP-3683. Fix dfs metrics to count file listings rather than files + listed. (lohit vijayarenu via cdouglas) + + HADOOP-3597. Fix SortValidator to use filesystems other than the default as + input. Validation job still runs on default fs. + (Jothi Padmanabhan via cdouglas) + + HADOOP-3693. Fix archives, distcp and native library documentation to + conform to style guidelines. (Amareshwari Sriramadasu via cdouglas) + + HADOOP-3653. Fix test-patch target to properly account for Eclipse + classpath jars. (Brice Arnould via nigel) + + HADOOP-3692. Fix documentation for Cluster setup and Quick start guides. + (Amareshwari Sriramadasu via ddas) + + HADOOP-3691. Fix streaming and tutorial docs. (Jothi Padmanabhan via ddas) + + HADOOP-3630. Fix NullPointerException in CompositeRecordReader from empty + sources (cdouglas) + + HADOOP-3706. Fix a ClassLoader issue in the mapred.join Parser that + prevents it from loading user-specified InputFormats. + (Jingkei Ly via cdouglas) + + HADOOP-3718. Fix KFSOutputStream::write(int) to output a byte instead of + an int, per the OutputStream contract. (Sriram Rao via cdouglas) + + HADOOP-3647. Add debug logs to help track down a very occassional, + hard-to-reproduce, bug in shuffle/merge on the reducer. (acmurthy) + + HADOOP-3716. Prevent listStatus in KosmosFileSystem from returning + null for valid, empty directories. (Sriram Rao via cdouglas) + + HADOOP-3752. Fix audit logging to record rename events. (cdouglas) + + HADOOP-3737. Fix CompressedWritable to call Deflater::end to release + compressor memory. (Grant Glouser via cdouglas) + + HADOOP-3670. Fixes JobTracker to clear out split bytes when no longer + required. (Amareshwari Sriramadasu via ddas) + + HADOOP-3755. Update gridmix to work with HOD 0.4 (Runping Qi via cdouglas) + + HADOOP-3743. Fix -libjars, -files, -archives options to work even if + user code does not implement tools. (Amareshwari Sriramadasu via mahadev) + + HADOOP-3774. Fix typos in shell output. (Tsz Wo (Nicholas), SZE via + cdouglas) + + HADOOP-3762. Fixed FileSystem cache to work with the default port. (cutting + via omalley) + + HADOOP-3798. Fix tests compilation. (Mukund Madhugiri via omalley) + + HADOOP-3794. Return modification time instead of zero for KosmosFileSystem. + (Sriram Rao via cdouglas) + + HADOOP-3806. Remove debug statement to stdout from QuickSort. (cdouglas) + + HADOOP-3776. Fix NPE at NameNode when datanode reports a block after it is + deleted at NameNode. (rangadi) + + HADOOP-3537. Disallow adding a datanode to a network topology when its + network location is not resolved. (hairong) + + HADOOP-3571. Fix bug in block removal used in lease recovery. (shv) + + HADOOP-3645. MetricsTimeVaryingRate returns wrong value for + metric_avg_time. (Lohit Vijayarenu via hairong) + + HADOOP-3521. Reverted the missing cast to float for sending Counters' values + to Hadoop metrics which was removed by HADOOP-544. (acmurthy) + + HADOOP-3820. Fixes two problems in the gridmix-env - a syntax error, and a + wrong definition of USE_REAL_DATASET by default. (Arun Murthy via ddas) + + HADOOP-3724. Fixes two problems related to storing and recovering lease + in the fsimage. (dhruba) + + HADOOP-3827. Fixed compression of empty map-outputs. (acmurthy) + + HADOOP-3865. Remove reference to FSNamesystem from metrics preventing + garbage collection. (Lohit Vijayarenu via cdouglas) + + HADOOP-3884. Fix so that Eclipse plugin builds against recent + Eclipse releases. (cutting) + + HADOOP-3837. Streaming jobs report progress status. (dhruba) + + HADOOP-3897. Fix a NPE in secondary namenode. (Lohit Vijayarenu via + cdouglas) + + HADOOP-3901. Fix bin/hadoop to correctly set classpath under cygwin. + (Tsz Wo (Nicholas) Sze via omalley) + + HADOOP-3947. Fix a problem in tasktracker reinitialization. + (Amareshwari Sriramadasu via ddas) + +Release 0.17.3 - Unreleased + + IMPROVEMENTS + + HADOOP-4164. Chinese translation of the documentation. (Xuebing Yan via + omalley) + + BUG FIXES + + HADOOP-4277. Checksum verification was mistakenly disabled for + LocalFileSystem. (Raghu Angadi) + + HADOOP-4271. Checksum input stream can sometimes return invalid + data to the user. (Ning Li via rangadi) + + HADOOP-4318. DistCp should use absolute paths for cleanup. (szetszwo) + + HADOOP-4326. ChecksumFileSystem does not override create(...) correctly. + (szetszwo) + +Release 0.17.2 - 2008-08-11 + + BUG FIXES + + HADOOP-3678. Avoid spurious exceptions logged at DataNode when clients + read from DFS. (rangadi) + + HADOOP-3707. NameNode keeps a count of number of blocks scheduled + to be written to a datanode and uses it to avoid allocating more + blocks than a datanode can hold. (rangadi) + + HADOOP-3760. Fix a bug with HDFS file close() mistakenly introduced + by HADOOP-3681. (Lohit Vijayarenu via rangadi) + + HADOOP-3681. DFSClient can get into an infinite loop while closing + a file if there are some errors. (Lohit Vijayarenu via rangadi) + + HADOOP-3002. Hold off block removal while in safe mode. (shv) + + HADOOP-3685. Unbalanced replication target. (hairong) + + HADOOP-3758. Shutdown datanode on version mismatch instead of retrying + continuously, preventing excessive logging at the namenode. + (lohit vijayarenu via cdouglas) + + HADOOP-3633. Correct exception handling in DataXceiveServer, and throttle + the number of xceiver threads in a data-node. (shv) + + HADOOP-3370. Ensure that the TaskTracker.runningJobs data-structure is + correctly cleaned-up on task completion. (Zheng Shao via acmurthy) + + HADOOP-3813. Fix task-output clean-up on HDFS to use the recursive + FileSystem.delete rather than the FileUtil.fullyDelete. (Amareshwari + Sri Ramadasu via acmurthy) + + HADOOP-3859. Allow the maximum number of xceivers in the data node to + be configurable. (Johan Oskarsson via omalley) + + HADOOP-3931. Fix corner case in the map-side sort that causes some values + to be counted as too large and cause pre-mature spills to disk. Some values + will also bypass the combiner incorrectly. (cdouglas via omalley) + +Release 0.17.1 - 2008-06-23 + + INCOMPATIBLE CHANGES + + HADOOP-3565. Fix the Java serialization, which is not enabled by + default, to clear the state of the serializer between objects. + (tomwhite via omalley) + + IMPROVEMENTS + + HADOOP-3522. Improve documentation on reduce pointing out that + input keys and values will be reused. (omalley) + + HADOOP-3487. Balancer uses thread pools for managing its threads; + therefore provides better resource management. (hairong) + + BUG FIXES + + HADOOP-2159 Namenode stuck in safemode. The counter blockSafe should + not be decremented for invalid blocks. (hairong) + + HADOOP-3472 MapFile.Reader getClosest() function returns incorrect results + when before is true (Todd Lipcon via Stack) + + HADOOP-3442. Limit recursion depth on the stack for QuickSort to prevent + StackOverflowErrors. To avoid O(n*n) cases, when partitioning depth exceeds + a multiple of log(n), change to HeapSort. (cdouglas) + + HADOOP-3477. Fix build to not package contrib/*/bin twice in + distributions. (Adam Heath via cutting) + + HADOOP-3475. Fix MapTask to correctly size the accounting allocation of + io.sort.mb. (cdouglas) + + HADOOP-3550. Fix the serialization data structures in MapTask where the + value lengths are incorrectly calculated. (cdouglas) + + HADOOP-3526. Fix contrib/data_join framework by cloning values retained + in the reduce. (Spyros Blanas via cdouglas) + + HADOOP-1979. Speed up fsck by adding a buffered stream. (Lohit + Vijaya Renu via omalley) + +Release 0.17.0 - 2008-05-18 + + INCOMPATIBLE CHANGES + + HADOOP-2786. Move hbase out of hadoop core + + HADOOP-2345. New HDFS transactions to support appending + to files. Disk layout version changed from -11 to -12. (dhruba) + + HADOOP-2192. Error messages from "dfs mv" command improved. + (Mahadev Konar via dhruba) + + HADOOP-1902. "dfs du" command without any arguments operates on the + current working directory. (Mahadev Konar via dhruba) + + HADOOP-2873. Fixed bad disk format introduced by HADOOP-2345. + Disk layout version changed from -12 to -13. See changelist 630992 + (dhruba) + + HADOOP-1985. This addresses rack-awareness for Map tasks and for + HDFS in a uniform way. (ddas) + + HADOOP-1986. Add support for a general serialization mechanism for + Map Reduce. (tomwhite) + + HADOOP-771. FileSystem.delete() takes an explicit parameter that + specifies whether a recursive delete is intended. + (Mahadev Konar via dhruba) + + HADOOP-2470. Remove getContentLength(String), open(String, long, long) + and isDir(String) from ClientProtocol. ClientProtocol version changed + from 26 to 27. (Tsz Wo (Nicholas), SZE via cdouglas) + + HADOOP-2822. Remove deprecated code for classes InputFormatBase and + PhasedFileSystem. (Amareshwari Sriramadasu via enis) + + HADOOP-2116. Changes the layout of the task execution directory. + (Amareshwari Sriramadasu via ddas) + + HADOOP-2828. The following deprecated methods in Configuration.java + have been removed + getObject(String name) + setObject(String name, Object value) + get(String name, Object defaultValue) + set(String name, Object value) + Iterator entries() + (Amareshwari Sriramadasu via ddas) + + HADOOP-2824. Removes one deprecated constructor from MiniMRCluster. + (Amareshwari Sriramadasu via ddas) + + HADOOP-2823. Removes deprecated methods getColumn(), getLine() from + org.apache.hadoop.record.compiler.generated.SimpleCharStream. + (Amareshwari Sriramadasu via ddas) + + HADOOP-3060. Removes one unused constructor argument from MiniMRCluster. + (Amareshwari Sriramadasu via ddas) + + HADOOP-2854. Remove deprecated o.a.h.ipc.Server::getUserInfo(). + (lohit vijayarenu via cdouglas) + + HADOOP-2563. Remove deprecated FileSystem::listPaths. + (lohit vijayarenu via cdouglas) + + HADOOP-2818. Remove deprecated methods in Counters. + (Amareshwari Sriramadasu via tomwhite) + + HADOOP-2831. Remove deprecated o.a.h.dfs.INode::getAbsoluteName() + (lohit vijayarenu via cdouglas) + + HADOOP-2839. Remove deprecated FileSystem::globPaths. + (lohit vijayarenu via cdouglas) + + HADOOP-2634. Deprecate ClientProtocol::exists. + (lohit vijayarenu via cdouglas) + + HADOOP-2410. Make EC2 cluster nodes more independent of each other. + Multiple concurrent EC2 clusters are now supported, and nodes may be + added to a cluster on the fly with new nodes starting in the same EC2 + availability zone as the cluster. Ganglia monitoring and large + instance sizes have also been added. (Chris K Wensel via tomwhite) + + HADOOP-2826. Deprecated FileSplit.getFile(), LineRecordReader.readLine(). + (Amareshwari Sriramadasu via ddas) + + HADOOP-3239. getFileInfo() returns null for non-existing files instead + of throwing FileNotFoundException. (Lohit Vijayarenu via shv) + + HADOOP-3266. Removed HOD changes from CHANGES.txt, as they are now inside + src/contrib/hod (Hemanth Yamijala via ddas) + + HADOOP-3280. Separate the configuration of the virtual memory size + (mapred.child.ulimit) from the jvm heap size, so that 64 bit + streaming applications are supported even when running with 32 bit + jvms. (acmurthy via omalley) + + NEW FEATURES + + HADOOP-1398. Add HBase in-memory block cache. (tomwhite) + + HADOOP-2178. Job History on DFS. (Amareshwari Sri Ramadasu via ddas) + + HADOOP-2063. A new parameter to dfs -get command to fetch a file + even if it is corrupted. (Tsz Wo (Nicholas), SZE via dhruba) + + HADOOP-2219. A new command "df -count" that counts the number of + files and directories. (Tsz Wo (Nicholas), SZE via dhruba) + + HADOOP-2906. Add an OutputFormat capable of using keys, values, and + config params to map records to different output files. + (Runping Qi via cdouglas) + + HADOOP-2346. Utilities to support timeout while writing to sockets. + DFSClient and DataNode sockets have 10min write timeout. (rangadi) + + HADOOP-2951. Add a contrib module that provides a utility to + build or update Lucene indexes using Map/Reduce. (Ning Li via cutting) + + HADOOP-1622. Allow multiple jar files for map reduce. + (Mahadev Konar via dhruba) + + HADOOP-2055. Allows users to set PathFilter on the FileInputFormat. + (Alejandro Abdelnur via ddas) + + HADOOP-2551. More environment variables like HADOOP_NAMENODE_OPTS + for better control of HADOOP_OPTS for each component. (rangadi) + + HADOOP-3001. Add job counters that measure the number of bytes + read and written to HDFS, S3, KFS, and local file systems. (omalley) + + HADOOP-3048. A new Interface and a default implementation to convert + and restore serializations of objects to/from strings. (enis) + + IMPROVEMENTS + + HADOOP-2655. Copy on write for data and metadata files in the + presence of snapshots. Needed for supporting appends to HDFS + files. (dhruba) + + HADOOP-1967. When a Path specifies the same scheme as the default + FileSystem but no authority, the default FileSystem's authority is + used. Also add warnings for old-format FileSystem names, accessor + methods for fs.default.name, and check for null authority in HDFS. + (cutting) + + HADOOP-2895. Let the profiling string be configurable. + (Martin Traverso via cdouglas) + + HADOOP-910. Enables Reduces to do merges for the on-disk map output files + in parallel with their copying. (Amar Kamat via ddas) + + HADOOP-730. Use rename rather than copy for local renames. (cdouglas) + + HADOOP-2810. Updated the Hadoop Core logo. (nigel) + + HADOOP-2057. Streaming should optionally treat a non-zero exit status + of a child process as a failed task. (Rick Cox via tomwhite) + + HADOOP-2765. Enables specifying ulimits for streaming/pipes tasks (ddas) + + HADOOP-2888. Make gridmix scripts more readily configurable and amenable + to automated execution. (Mukund Madhugiri via cdouglas) + + HADOOP-2908. A document that describes the DFS Shell command. + (Mahadev Konar via dhruba) + + HADOOP-2981. Update README.txt to reflect the upcoming use of + cryptography. (omalley) + + HADOOP-2804. Add support to publish CHANGES.txt as HTML when running + the Ant 'docs' target. (nigel) + + HADOOP-2559. Change DFS block placement to allocate the first replica + locally, the second off-rack, and the third intra-rack from the + second. (lohit vijayarenu via cdouglas) + + HADOOP-2939. Make the automated patch testing process an executable + Ant target, test-patch. (nigel) + + HADOOP-2239. Add HsftpFileSystem to permit transferring files over ssl. + (cdouglas) + + HADOOP-2886. Track individual RPC metrics. + (girish vaitheeswaran via dhruba) + + HADOOP-2373. Improvement in safe-mode reporting. (shv) + + HADOOP-3091. Modify FsShell command -put to accept multiple sources. + (Lohit Vijaya Renu via cdouglas) + + HADOOP-3092. Show counter values from job -status command. + (Tom White via ddas) + + HADOOP-1228. Ant task to generate Eclipse project files. (tomwhite) + + HADOOP-3093. Adds Configuration.getStrings(name, default-value) and + the corresponding setStrings. (Amareshwari Sriramadasu via ddas) + + HADOOP-3106. Adds documentation in forrest for debugging. + (Amareshwari Sriramadasu via ddas) + + HADOOP-3099. Add an option to distcp to preserve user, group, and + permission information. (Tsz Wo (Nicholas), SZE via cdouglas) + + HADOOP-2841. Unwrap AccessControlException and FileNotFoundException + from RemoteException for DFSClient. (shv) + + HADOOP-3152. Make index interval configuable when using + MapFileOutputFormat for map-reduce job. (Rong-En Fan via cutting) + + HADOOP-3143. Decrease number of slaves from 4 to 3 in TestMiniMRDFSSort, + as Hudson generates false negatives under the current load. + (Nigel Daley via cdouglas) + + HADOOP-3174. Illustrative example for MultipleFileInputFormat. (Enis + Soztutar via acmurthy) + + HADOOP-2993. Clarify the usage of JAVA_HOME in the Quick Start guide. + (acmurthy via nigel) + + HADOOP-3124. Make DataNode socket write timeout configurable. (rangadi) + + OPTIMIZATIONS + + HADOOP-2790. Fixed inefficient method hasSpeculativeTask by removing + repetitive calls to get the current time and late checking to see if + we want speculation on at all. (omalley) + + HADOOP-2758. Reduce buffer copies in DataNode when data is read from + HDFS, without negatively affecting read throughput. (rangadi) + + HADOOP-2399. Input key and value to combiner and reducer is reused. + (Owen O'Malley via ddas). + + HADOOP-2423. Code optimization in FSNamesystem.mkdirs. + (Tsz Wo (Nicholas), SZE via dhruba) + + HADOOP-2606. ReplicationMonitor selects data-nodes to replicate directly + from needed replication blocks instead of looking up for the blocks for + each live data-node. (shv) + + HADOOP-2148. Eliminate redundant data-node blockMap lookups. (shv) + + HADOOP-2027. Return the number of bytes in each block in a file + via a single rpc to the namenode to speed up job planning. + (Lohit Vijaya Renu via omalley) + + HADOOP-2902. Replace uses of "fs.default.name" with calls to the + accessor methods added in HADOOP-1967. (cutting) + + HADOOP-2119. Optimize scheduling of jobs with large numbers of + tasks by replacing static arrays with lists of runnable tasks. + (Amar Kamat via omalley) + + HADOOP-2919. Reduce the number of memory copies done during the + map output sorting. Also adds two config variables: + io.sort.spill.percent - the percentages of io.sort.mb that should + cause a spill (default 80%) + io.sort.record.percent - the percent of io.sort.mb that should + hold key/value indexes (default 5%) + (cdouglas via omalley) + + HADOOP-3140. Doesn't add a task in the commit queue if the task hadn't + generated any output. (Amar Kamat via ddas) + + HADOOP-3168. Reduce the amount of logging in streaming to an + exponentially increasing number of records (up to 10,000 + records/log). (Zheng Shao via omalley) + + BUG FIXES + + HADOOP-2195. '-mkdir' behaviour is now closer to Linux shell in case of + errors. (Mahadev Konar via rangadi) + + HADOOP-2190. bring behaviour '-ls' and '-du' closer to Linux shell + commands in case of errors. (Mahadev Konar via rangadi) + + HADOOP-2193. 'fs -rm' and 'fs -rmr' show error message when the target + file does not exist. (Mahadev Konar via rangadi) + + HADOOP-2738 Text is not subclassable because set(Text) and compareTo(Object) + access the other instance's private members directly. (jimk) + + HADOOP-2779. Remove the references to HBase in the build.xml. (omalley) + + HADOOP-2194. dfs cat on a non-existent file throws FileNotFoundException. + (Mahadev Konar via dhruba) + + HADOOP-2767. Fix for NetworkTopology erroneously skipping the last leaf + node on a rack. (Hairong Kuang and Mark Butler via dhruba) + + HADOOP-1593. FsShell works with paths in non-default FileSystem. + (Mahadev Konar via dhruba) + + HADOOP-2191. du and dus command on non-existent directory gives + appropriate error message. (Mahadev Konar via dhruba) + + HADOOP-2832. Remove tabs from code of DFSClient for better + indentation. (dhruba) + + HADOOP-2844. distcp closes file handles for sequence files. + (Tsz Wo (Nicholas), SZE via dhruba) + + HADOOP-2727. Fix links in Web UI of the hadoop daemons and some docs + (Amareshwari Sri Ramadasu via ddas) + + HADOOP-2871. Fixes a problem to do with file: URI in the JobHistory init. + (Amareshwari Sri Ramadasu via ddas) + + HADOOP-2800. Deprecate SetFile.Writer constructor not the whole class. + (Johan Oskarsson via tomwhite) + + HADOOP-2891. DFSClient.close() closes all open files. (dhruba) + + HADOOP-2845. Fix dfsadmin disk utilization report on Solaris. + (Martin Traverso via tomwhite) + + HADOOP-2912. MiniDFSCluster restart should wait for namenode to exit + safemode. This was causing TestFsck to fail. (Mahadev Konar via dhruba) + + HADOOP-2820. The following classes in streaming are removed : + StreamLineRecordReader StreamOutputFormat StreamSequenceRecordReader. + (Amareshwari Sri Ramadasu via ddas) + + HADOOP-2819. The following methods in JobConf are removed: + getInputKeyClass() setInputKeyClass getInputValueClass() + setInputValueClass(Class theClass) setSpeculativeExecution + getSpeculativeExecution() (Amareshwari Sri Ramadasu via ddas) + + HADOOP-2817. Removes deprecated mapred.tasktracker.tasks.maximum and + ClusterStatus.getMaxTasks(). (Amareshwari Sri Ramadasu via ddas) + + HADOOP-2821. Removes deprecated ShellUtil and ToolBase classes from + the util package. (Amareshwari Sri Ramadasu via ddas) + + HADOOP-2934. The namenode was encountreing a NPE while loading + leases from the fsimage. Fixed. (dhruba) + + HADOOP-2938. Some fs commands did not glob paths. + (Tsz Wo (Nicholas), SZE via rangadi) + + HADOOP-2943. Compression of intermediate map output causes failures + in the merge. (cdouglas) + + HADOOP-2870. DataNode and NameNode closes all connections while + shutting down. (Hairong Kuang via dhruba) + + HADOOP-2973. Fix TestLocalDFS for Windows platform. + (Tsz Wo (Nicholas), SZE via dhruba) + + HADOOP-2971. select multiple times if it returns early in + SocketIOWithTimeout. (rangadi) + + HADOOP-2955. Fix TestCrcCorruption test failures caused by HADOOP-2758 + (rangadi) + + HADOOP-2657. A flush call on the DFSOutputStream flushes the last + partial CRC chunk too. (dhruba) + + HADOOP-2974. IPC unit tests used "0.0.0.0" to connect to server, which + is not always supported. (rangadi) + + HADOOP-2996. Fixes uses of StringBuffer in StreamUtils class. + (Dave Brosius via ddas) + + HADOOP-2995. Fixes StreamBaseRecordReader's getProgress to return a + floating point number. (Dave Brosius via ddas) + + HADOOP-2972. Fix for a NPE in FSDataset.invalidate. + (Mahadev Konar via dhruba) + + HADOOP-2994. Code cleanup for DFSClient: remove redundant + conversions from string to string. (Dave Brosius via dhruba) + + HADOOP-3009. TestFileCreation sometimes fails because restarting + minidfscluster sometimes creates datanodes with ports that are + different from their original instance. (dhruba) + + HADOOP-2992. Distributed Upgrade framework works correctly with + more than one upgrade object. (Konstantin Shvachko via dhruba) + + HADOOP-2679. Fix a typo in libhdfs. (Jason via dhruba) + + HADOOP-2976. When a lease expires, the Namenode ensures that + blocks of the file are adequately replicated. (dhruba) + + HADOOP-2901. Fixes the creation of info servers in the JobClient + and JobTracker. Removes the creation from JobClient and removes + additional info server from the JobTracker. Also adds the command + line utility to view the history files (HADOOP-2896), and fixes + bugs in JSPs to do with analysis - HADOOP-2742, HADOOP-2792. + (Amareshwari Sri Ramadasu via ddas) + + HADOOP-2890. If different datanodes report the same block but + with different sizes to the namenode, the namenode picks the + replica(s) with the largest size as the only valid replica(s). (dhruba) + + HADOOP-2825. Deprecated MapOutputLocation.getFile() is removed. + (Amareshwari Sri Ramadasu via ddas) + + HADOOP-2806. Fixes a streaming document. + (Amareshwari Sriramadasu via ddas) + + HADOOP-3008. SocketIOWithTimeout throws InterruptedIOException if the + thread is interrupted while it is waiting. (rangadi) + + HADOOP-3006. Fix wrong packet size reported by DataNode when a block + is being replicated. (rangadi) + + HADOOP-3029. Datanode prints log message "firstbadlink" only if + it detects a bad connection to another datanode in the pipeline. (dhruba) + + HADOOP-3030. Release reserved space for file in InMemoryFileSystem if + checksum reservation fails. (Devaraj Das via cdouglas) + + HADOOP-3036. Fix findbugs warnings in UpgradeUtilities. (Konstantin + Shvachko via cdouglas) + + HADOOP-3025. ChecksumFileSystem supports the delete method with + the recursive flag. (Mahadev Konar via dhruba) + + HADOOP-3012. dfs -mv file to user home directory throws exception if + the user home directory does not exist. (Mahadev Konar via dhruba) + + HADOOP-3066. Should not require superuser privilege to query if hdfs is in + safe mode (jimk) + + HADOOP-3040. If the input line starts with the separator char, the key + is set as empty. (Amareshwari Sriramadasu via ddas) + + HADOOP-3080. Removes flush calls from JobHistory. + (Amareshwari Sriramadasu via ddas) + + HADOOP-3086. Adds the testcase missed during commit of hadoop-3040. + (Amareshwari Sriramadasu via ddas) + + HADOOP-3046. Fix the raw comparators for Text and BytesWritables + to use the provided length rather than recompute it. (omalley) + + HADOOP-3094. Fix BytesWritable.toString to avoid extending the sign bit + (Owen O'Malley via cdouglas) + + HADOOP-3067. DFSInputStream's position read does not close the sockets. + (rangadi) + + HADOOP-3073. close() on SocketInputStream or SocketOutputStream should + close the underlying channel. (rangadi) + + HADOOP-3087. Fixes a problem to do with refreshing of loadHistory.jsp. + (Amareshwari Sriramadasu via ddas) + + HADOOP-3065. Better logging message if the rack location of a datanode + cannot be determined. (Devaraj Das via dhruba) + + HADOOP-3064. Commas in a file path should not be treated as delimiters. + (Hairong Kuang via shv) + + HADOOP-2997. Adds test for non-writable serialier. Also fixes a problem + introduced by HADOOP-2399. (Tom White via ddas) + + HADOOP-3114. Fix TestDFSShell on Windows. (Lohit Vijaya Renu via cdouglas) + + HADOOP-3118. Fix Namenode NPE while loading fsimage after a cluster + upgrade from older disk format. (dhruba) + + HADOOP-3161. Fix FIleUtil.HardLink.getLinkCount on Mac OS. (nigel + via omalley) + + HADOOP-2927. Fix TestDU to acurately calculate the expected file size. + (shv via nigel) + + HADOOP-3123. Fix the native library build scripts to work on Solaris. + (tomwhite via omalley) + + HADOOP-3089. Streaming should accept stderr from task before + first key arrives. (Rick Cox via tomwhite) + + HADOOP-3146. A DFSOutputStream.flush method is renamed as + DFSOutputStream.fsync. (dhruba) + + HADOOP-3165. -put/-copyFromLocal did not treat input file "-" as stdin. + (Lohit Vijayarenu via rangadi) + + HADOOP-3041. Deprecate JobConf.setOutputPath and JobConf.getOutputPath. + Deprecate OutputFormatBase. Add FileOutputFormat. Existing output formats + extending OutputFormatBase, now extend FileOutputFormat. Add the following + APIs in FileOutputFormat: setOutputPath, getOutputPath, getWorkOutputPath. + (Amareshwari Sriramadasu via nigel) + + HADOOP-3083. The fsimage does not store leases. This would have to be + reworked in the next release to support appends. (dhruba) + + HADOOP-3166. Fix an ArrayIndexOutOfBoundsException in the spill thread + and make exception handling more promiscuous to catch this condition. + (cdouglas) + + HADOOP-3050. DataNode sends one and only one block report after + it registers with the namenode. (Hairong Kuang) + + HADOOP-3044. NNBench sets the right configuration for the mapper. + (Hairong Kuang) + + HADOOP-3178. Fix GridMix scripts for small and medium jobs + to handle input paths differently. (Mukund Madhugiri via nigel) + + HADOOP-1911. Fix an infinite loop in DFSClient when all replicas of a + block are bad (cdouglas) + + HADOOP-3157. Fix path handling in DistributedCache and TestMiniMRLocalFS. + (Doug Cutting via rangadi) + + HADOOP-3018. Fix the eclipse plug-in contrib wrt removed deprecated + methods (taton) + + HADOOP-3183. Fix TestJobShell to use 'ls' instead of java.io.File::exists + since cygwin symlinks are unsupported. + (Mahadev konar via cdouglas) + + HADOOP-3175. Fix FsShell.CommandFormat to handle "-" in arguments. + (Edward J. Yoon via rangadi) + + HADOOP-3220. Safemode message corrected. (shv) + + HADOOP-3208. Fix WritableDeserializer to set the Configuration on + deserialized Writables. (Enis Soztutar via cdouglas) + + HADOOP-3224. 'dfs -du /dir' does not return correct size. + (Lohit Vjayarenu via rangadi) + + HADOOP-3223. Fix typo in help message for -chmod. (rangadi) + + HADOOP-1373. checkPath() should ignore case when it compares authoriy. + (Edward J. Yoon via rangadi) + + HADOOP-3204. Fixes a problem to do with ReduceTask's LocalFSMerger not + catching Throwable. (Amar Ramesh Kamat via ddas) + + HADOOP-3229. Report progress when collecting records from the mapper and + the combiner. (Doug Cutting via cdouglas) + + HADOOP-3225. Unwrapping methods of RemoteException should initialize + detailedMassage field. (Mahadev Konar, shv, cdouglas) + + HADOOP-3247. Fix gridmix scripts to use the correct globbing syntax and + change maxentToSameCluster to run the correct number of jobs. + (Runping Qi via cdouglas) + + HADOOP-3242. Fix the RecordReader of SequenceFileAsBinaryInputFormat to + correctly read from the start of the split and not the beginning of the + file. (cdouglas via acmurthy) + + HADOOP-3256. Encodes the job name used in the filename for history files. + (Arun Murthy via ddas) + + HADOOP-3162. Ensure that comma-separated input paths are treated correctly + as multiple input paths. (Amareshwari Sri Ramadasu via acmurthy) + + HADOOP-3263. Ensure that the job-history log file always follows the + pattern of hostname_timestamp_jobid_username_jobname even if username + and/or jobname are not specfied. This helps to avoid wrong assumptions + made about the job-history log filename in jobhistory.jsp. (acmurthy) + + HADOOP-3251. Fixes getFilesystemName in JobTracker and LocalJobRunner to + use FileSystem.getUri instead of FileSystem.getName. (Arun Murthy via ddas) + + HADOOP-3237. Fixes TestDFSShell.testErrOutPut on Windows platform. + (Mahadev Konar via ddas) + + HADOOP-3279. TaskTracker checks for SUCCEEDED task status in addition to + COMMIT_PENDING status when it fails maps due to lost map. + (Devaraj Das) + + HADOOP-3286. Prevent collisions in gridmix output dirs by increasing the + granularity of the timestamp. (Runping Qi via cdouglas) + + HADOOP-3285. Fix input split locality when the splits align to + fs blocks. (omalley) + + HADOOP-3372. Fix heap management in streaming tests. (Arun Murthy via + cdouglas) + + HADOOP-3031. Fix javac warnings in test classes. (cdouglas) + + HADOOP-3382. Fix memory leak when files are not cleanly closed (rangadi) + + HADOOP-3322. Fix to push MetricsRecord for rpc metrics. (Eric Yang via + mukund) + +Release 0.16.4 - 2008-05-05 + + BUG FIXES + + HADOOP-3138. DFS mkdirs() should not throw an exception if the directory + already exists. (rangadi via mukund) + + HADOOP-3294. Fix distcp to check the destination length and retry the copy + if it doesn't match the src length. (Tsz Wo (Nicholas), SZE via mukund) + + HADOOP-3186. Fix incorrect permission checkding for mv and renameTo + in HDFS. (Tsz Wo (Nicholas), SZE via mukund) + +Release 0.16.3 - 2008-04-16 + + BUG FIXES + + HADOOP-3010. Fix ConcurrentModificationException in ipc.Server.Responder. + (rangadi) + + HADOOP-3154. Catch all Throwables from the SpillThread in MapTask, rather + than IOExceptions only. (ddas via cdouglas) + + HADOOP-3159. Avoid file system cache being overwritten whenever + configuration is modified. (Tsz Wo (Nicholas), SZE via hairong) + + HADOOP-3139. Remove the consistency check for the FileSystem cache in + closeAll() that causes spurious warnings and a deadlock. + (Tsz Wo (Nicholas), SZE via cdouglas) + + HADOOP-3195. Fix TestFileSystem to be deterministic. + (Tsz Wo (Nicholas), SZE via cdouglas) + + HADOOP-3069. Primary name-node should not truncate image when transferring + it from the secondary. (shv) + + HADOOP-3182. Change permissions of the job-submission directory to 777 + from 733 to ensure sharing of HOD clusters works correctly. (Tsz Wo + (Nicholas), Sze and Amareshwari Sri Ramadasu via acmurthy) + +Release 0.16.2 - 2008-04-02 + + BUG FIXES + + HADOOP-3011. Prohibit distcp from overwriting directories on the + destination filesystem with files. (cdouglas) + + HADOOP-3033. The BlockReceiver thread in the datanode writes data to + the block file, changes file position (if needed) and flushes all by + itself. The PacketResponder thread does not flush block file. (dhruba) + + HADOOP-2978. Fixes the JobHistory log format for counters. + (Runping Qi via ddas) + + HADOOP-2985. Fixes LocalJobRunner to tolerate null job output path. + Also makes the _temporary a constant in MRConstants.java. + (Amareshwari Sriramadasu via ddas) + + HADOOP-3003. FileSystem cache key is updated after a + FileSystem object is created. (Tsz Wo (Nicholas), SZE via dhruba) + + HADOOP-3042. Updates the Javadoc in JobConf.getOutputPath to reflect + the actual temporary path. (Amareshwari Sriramadasu via ddas) + + HADOOP-3007. Tolerate mirror failures while DataNode is replicating + blocks as it used to before. (rangadi) + + HADOOP-2944. Fixes a "Run on Hadoop" wizard NPE when creating a + Location from the wizard. (taton) + + HADOOP-3049. Fixes a problem in MultiThreadedMapRunner to do with + catching RuntimeExceptions. (Alejandro Abdelnur via ddas) + + HADOOP-3039. Fixes a problem to do with exceptions in tasks not + killing jobs. (Amareshwari Sriramadasu via ddas) + + HADOOP-3027. Fixes a problem to do with adding a shutdown hook in + FileSystem. (Amareshwari Sriramadasu via ddas) + + HADOOP-3056. Fix distcp when the target is an empty directory by + making sure the directory is created first. (cdouglas and acmurthy + via omalley) + + HADOOP-3070. Protect the trash emptier thread from null pointer + exceptions. (Koji Noguchi via omalley) + + HADOOP-3084. Fix HftpFileSystem to work for zero-lenghth files. + (cdouglas) + + HADOOP-3107. Fix NPE when fsck invokes getListings. (dhruba) + + HADOOP-3104. Limit MultithreadedMapRunner to have a fixed length queue + between the RecordReader and the map threads. (Alejandro Abdelnur via + omalley) + + HADOOP-2833. Do not use "Dr. Who" as the default user in JobClient. + A valid user name is required. (Tsz Wo (Nicholas), SZE via rangadi) + + HADOOP-3128. Throw RemoteException in setPermissions and setOwner of + DistributedFileSystem. (shv via nigel) + +Release 0.16.1 - 2008-03-13 + + INCOMPATIBLE CHANGES + + HADOOP-2869. Deprecate SequenceFile.setCompressionType in favor of + SequenceFile.createWriter, SequenceFileOutputFormat.setCompressionType, + and JobConf.setMapOutputCompressionType. (Arun C Murthy via cdouglas) + Configuration changes to hadoop-default.xml: + deprecated io.seqfile.compression.type + + IMPROVEMENTS + + HADOOP-2371. User guide for file permissions in HDFS. + (Robert Chansler via rangadi) + + HADOOP-3098. Allow more characters in user and group names while + using -chown and -chgrp commands. (rangadi) + + BUG FIXES + + HADOOP-2789. Race condition in IPC Server Responder that could close + connections early. (Raghu Angadi) + + HADOOP-2785. minor. Fix a typo in Datanode block verification + (Raghu Angadi) + + HADOOP-2788. minor. Fix help message for chgrp shell command (Raghu Angadi). + + HADOOP-1188. fstime file is updated when a storage directory containing + namespace image becomes inaccessible. (shv) + + HADOOP-2787. An application can set a configuration variable named + dfs.umask to set the umask that is used by DFS. + (Tsz Wo (Nicholas), SZE via dhruba) + + HADOOP-2780. The default socket buffer size for DataNodes is 128K. + (dhruba) + + HADOOP-2716. Superuser privileges for the Balancer. + (Tsz Wo (Nicholas), SZE via shv) + + HADOOP-2754. Filter out .crc files from local file system listing. + (Hairong Kuang via shv) + + HADOOP-2733. Fix compiler warnings in test code. + (Tsz Wo (Nicholas), SZE via cdouglas) + + HADOOP-2725. Modify distcp to avoid leaving partially copied files at + the destination after encountering an error. (Tsz Wo (Nicholas), SZE + via cdouglas) + + HADOOP-2391. Cleanup job output directory before declaring a job as + SUCCESSFUL. (Amareshwari Sri Ramadasu via ddas) + + HADOOP-2808. Minor fix to FileUtil::copy to mind the overwrite + formal. (cdouglas) + + HADOOP-2683. Moving UGI out of the RPC Server. + (Tsz Wo (Nicholas), SZE via shv) + + HADOOP-2814. Fix for NPE in datanode in unit test TestDataTransferProtocol. + (Raghu Angadi via dhruba) + + HADOOP-2811. Dump of counters in job history does not add comma between + groups. (runping via omalley) + + HADOOP-2735. Enables setting TMPDIR for tasks. + (Amareshwari Sri Ramadasu via ddas) + + HADOOP-2843. Fix protections on map-side join classes to enable derivation. + (cdouglas via omalley) + + HADOOP-2840. Fix gridmix scripts to correctly invoke the java sort through + the proper jar. (Mukund Madhugiri via cdouglas) + + HADOOP-2769. TestNNThroughputBnechmark should not use a fixed port for + the namenode http port. (omalley) + + HADOOP-2852. Update gridmix benchmark to avoid an artifically long tail. + (cdouglas) + + HADOOP-2894. Fix a problem to do with tasktrackers failing to connect to + JobTracker upon reinitialization. (Owen O'Malley via ddas). + + HADOOP-2903. Fix exception generated by Metrics while using pushMetric(). + (girish vaitheeswaran via dhruba) + + HADOOP-2904. Fix to RPC metrics to log the correct host name. + (girish vaitheeswaran via dhruba) + + HADOOP-2918. Improve error logging so that dfs writes failure with + "No lease on file" can be diagnosed. (dhruba) + + HADOOP-2923. Add SequenceFileAsBinaryInputFormat, which was + missed in the commit for HADOOP-2603. (cdouglas via omalley) + + HADOOP-2931. IOException thrown by DFSOutputStream had wrong stack + trace in some cases. (Michael Bieniosek via rangadi) + + HADOOP-2883. Write failures and data corruptions on HDFS files. + The write timeout is back to what it was on 0.15 release. Also, the + datnodes flushes the block file buffered output stream before + sending a positive ack for the packet back to the client. (dhruba) + + HADOOP-2756. NPE in DFSClient while closing DFSOutputStreams + under load. (rangadi) + + HADOOP-2958. Fixed FileBench which broke due to HADOOP-2391 which performs + a check for existence of the output directory and a trivial bug in + GenericMRLoadGenerator where min/max word lenghts were identical since + they were looking at the same config variables (Chris Douglas via + acmurthy) + + HADOOP-2915. Fixed FileSystem.CACHE so that a username is included + in the cache key. (Tsz Wo (Nicholas), SZE via nigel) + + HADOOP-2813. TestDU unit test uses its own directory to run its + sequence of tests. (Mahadev Konar via dhruba) + +Release 0.16.0 - 2008-02-07 + + INCOMPATIBLE CHANGES + + HADOOP-1245. Use the mapred.tasktracker.tasks.maximum value + configured on each tasktracker when allocating tasks, instead of + the value configured on the jobtracker. InterTrackerProtocol + version changed from 5 to 6. (Michael Bieniosek via omalley) + + HADOOP-1843. Removed code from Configuration and JobConf deprecated by + HADOOP-785 and a minor fix to Configuration.toString. Specifically the + important change is that mapred-default.xml is no longer supported and + Configuration no longer supports the notion of default/final resources. + (acmurthy) + + HADOOP-1302. Remove deprecated abacus code from the contrib directory. + This also fixes a configuration bug in AggregateWordCount, so that the + job now works. (enis) + + HADOOP-2288. Enhance FileSystem API to support access control. + (Tsz Wo (Nicholas), SZE via dhruba) + + HADOOP-2184. RPC Support for user permissions and authentication. + (Raghu Angadi via dhruba) + + HADOOP-2185. RPC Server uses any available port if the specified + port is zero. Otherwise it uses the specified port. Also combines + the configuration attributes for the servers' bind address and + port from "x.x.x.x" and "y" to "x.x.x.x:y". + Deprecated configuration variables: + dfs.info.bindAddress + dfs.info.port + dfs.datanode.bindAddress + dfs.datanode.port + dfs.datanode.info.bindAdress + dfs.datanode.info.port + dfs.secondary.info.bindAddress + dfs.secondary.info.port + mapred.job.tracker.info.bindAddress + mapred.job.tracker.info.port + mapred.task.tracker.report.bindAddress + tasktracker.http.bindAddress + tasktracker.http.port + New configuration variables (post HADOOP-2404): + dfs.secondary.http.address + dfs.datanode.address + dfs.datanode.http.address + dfs.http.address + mapred.job.tracker.http.address + mapred.task.tracker.report.address + mapred.task.tracker.http.address + (Konstantin Shvachko via dhruba) + + HADOOP-2401. Only the current leaseholder can abandon a block for + a HDFS file. ClientProtocol version changed from 20 to 21. + (Tsz Wo (Nicholas), SZE via dhruba) + + HADOOP-2381. Support permission information in FileStatus. Client + Protocol version changed from 21 to 22. (Raghu Angadi via dhruba) + + HADOOP-2110. Block report processing creates fewer transient objects. + Datanode Protocol version changed from 10 to 11. + (Sanjay Radia via dhruba) + + HADOOP-2567. Add FileSystem#getHomeDirectory(), which returns the + user's home directory in a FileSystem as a fully-qualified path. + FileSystem#getWorkingDirectory() is also changed to return a + fully-qualified path, which can break applications that attempt + to, e.g., pass LocalFileSystem#getWorkingDir().toString() directly + to java.io methods that accept file names. (cutting) + + HADOOP-2514. Change trash feature to maintain a per-user trash + directory, named ".Trash" in the user's home directory. The + "fs.trash.root" parameter is no longer used. Full source paths + are also no longer reproduced within the trash. + + HADOOP-2012. Periodic data verification on Datanodes. + (Raghu Angadi via dhruba) + + HADOOP-1707. The DFSClient does not use a local disk file to cache + writes to a HDFS file. Changed Data Transfer Version from 7 to 8. + (dhruba) + + HADOOP-2652. Fix permission issues for HftpFileSystem. This is an + incompatible change since distcp may not be able to copy files + from cluster A (compiled with this patch) to cluster B (compiled + with previous versions). (Tsz Wo (Nicholas), SZE via dhruba) + + NEW FEATURES + + HADOOP-1857. Ability to run a script when a task fails to capture stack + traces. (Amareshwari Sri Ramadasu via ddas) + + HADOOP-2299. Defination of a login interface. A simple implementation for + Unix users and groups. (Hairong Kuang via dhruba) + + HADOOP-1652. A utility to balance data among datanodes in a HDFS cluster. + (Hairong Kuang via dhruba) + + HADOOP-2085. A library to support map-side joins of consistently + partitioned and sorted data sets. (Chris Douglas via omalley) + + HADOOP-2336. Shell commands to modify file permissions. (rangadi) + + HADOOP-1298. Implement file permissions for HDFS. + (Tsz Wo (Nicholas) & taton via cutting) + + HADOOP-2447. HDFS can be configured to limit the total number of + objects (inodes and blocks) in the file system. (dhruba) + + HADOOP-2487. Added an option to get statuses for all submitted/run jobs. + This information can be used to develop tools for analysing jobs. + (Amareshwari Sri Ramadasu via acmurthy) + + HADOOP-1873. Implement user permissions for Map/Reduce framework. + (Hairong Kuang via shv) + + HADOOP-2532. Add to MapFile a getClosest method that returns the key + that comes just before if the key is not present. (stack via tomwhite) + + HADOOP-1883. Add versioning to Record I/O. (Vivek Ratan via ddas) + + HADOOP-2603. Add SeqeunceFileAsBinaryInputFormat, which reads + sequence files as BytesWritable/BytesWritable regardless of the + key and value types used to write the file. (cdouglas via omalley) + + HADOOP-2367. Add ability to profile a subset of map/reduce tasks and fetch + the result to the local filesystem of the submitting application. Also + includes a general IntegerRanges extension to Configuration for setting + positive, ranged parameters. (Owen O'Malley via cdouglas) + + IMPROVEMENTS + + HADOOP-2045. Change committer list on website to a table, so that + folks can list their organization, timezone, etc. (cutting) + + HADOOP-2058. Facilitate creating new datanodes dynamically in + MiniDFSCluster. (Hairong Kuang via dhruba) + + HADOOP-1855. fsck verifies block placement policies and reports + violations. (Konstantin Shvachko via dhruba) + + HADOOP-1604. An system administrator can finalize namenode upgrades + without running the cluster. (Konstantin Shvachko via dhruba) + + HADOOP-1839. Link-ify the Pending/Running/Complete/Killed grid in + jobdetails.jsp to help quickly narrow down and see categorized TIPs' + details via jobtasks.jsp. (Amar Kamat via acmurthy) + + HADOOP-1210. Log counters in job history. (Owen O'Malley via ddas) + + HADOOP-1912. Datanode has two new commands COPY and REPLACE. These are + needed for supporting data rebalance. (Hairong Kuang via dhruba) + + HADOOP-2086. This patch adds the ability to add dependencies to a job + (run via JobControl) after construction. (Adrian Woodhead via ddas) + + HADOOP-1185. Support changing the logging level of a server without + restarting the server. (Tsz Wo (Nicholas), SZE via dhruba) + + HADOOP-2134. Remove developer-centric requirements from overview.html and + keep it end-user focussed, specifically sections related to subversion and + building Hadoop. (Jim Kellerman via acmurthy) + + HADOOP-1989. Support simulated DataNodes. This helps creating large virtual + clusters for testing purposes. (Sanjay Radia via dhruba) + + HADOOP-1274. Support different number of mappers and reducers per + TaskTracker to allow administrators to better configure and utilize + heterogenous clusters. + Configuration changes to hadoop-default.xml: + add mapred.tasktracker.map.tasks.maximum (default value of 2) + add mapred.tasktracker.reduce.tasks.maximum (default value of 2) + remove mapred.tasktracker.tasks.maximum (deprecated for 0.16.0) + (Amareshwari Sri Ramadasu via acmurthy) + + HADOOP-2104. Adds a description to the ant targets. This makes the + output of "ant -projecthelp" sensible. (Chris Douglas via ddas) + + HADOOP-2127. Added a pipes sort example to benchmark trivial pipes + application versus trivial java application. (omalley via acmurthy) + + HADOOP-2113. A new shell command "dfs -text" to view the contents of + a gziped or SequenceFile. (Chris Douglas via dhruba) + + HADOOP-2207. Add a "package" target for contrib modules that + permits each to determine what files are copied into release + builds. (stack via cutting) + + HADOOP-1984. Makes the backoff for failed fetches exponential. + Earlier, it was a random backoff from an interval. + (Amar Kamat via ddas) + + HADOOP-1327. Include website documentation for streaming. (Rob Weltman + via omalley) + + HADOOP-2000. Rewrite NNBench to measure namenode performance accurately. + It now uses the map-reduce framework for load generation. + (Mukund Madhugiri via dhruba) + + HADOOP-2248. Speeds up the framework w.r.t Counters. Also has API + updates to the Counters part. (Owen O'Malley via ddas) + + HADOOP-2326. The initial block report at Datanode startup time has + a random backoff period. (Sanjay Radia via dhruba) + + HADOOP-2432. HDFS includes the name of the file while throwing + "File does not exist" exception. (Jim Kellerman via dhruba) + + HADOOP-2457. Added a 'forrest.home' property to the 'docs' target in + build.xml. (acmurthy) + + HADOOP-2149. A new benchmark for three name-node operation: file create, + open, and block report, to evaluate the name-node performance + for optimizations or new features. (Konstantin Shvachko via shv) + + HADOOP-2466. Change FileInputFormat.computeSplitSize to a protected + non-static method to allow sub-classes to provide alternate + implementations. (Alejandro Abdelnur via acmurthy) + + HADOOP-2425. Change TextOutputFormat to handle Text specifically for better + performance. Make NullWritable implement Comparable. Make TextOutputFormat + treat NullWritable like null. (omalley) + + HADOOP-1719. Improves the utilization of shuffle copier threads. + (Amar Kamat via ddas) + + HADOOP-2390. Added documentation for user-controls for intermediate + map-outputs & final job-outputs and native-hadoop libraries. (acmurthy) + + HADOOP-1660. Add the cwd of the map/reduce task to the java.library.path + of the child-jvm to support loading of native libraries distributed via + the DistributedCache. (acmurthy) + + HADOOP-2285. Speeds up TextInputFormat. Also includes updates to the + Text API. (Owen O'Malley via cdouglas) + + HADOOP-2233. Adds a generic load generator for modeling MR jobs. (cdouglas) + + HADOOP-2369. Adds a set of scripts for simulating a mix of user map/reduce + workloads. (Runping Qi via cdouglas) + + HADOOP-2547. Removes use of a 'magic number' in build.xml. + (Hrishikesh via nigel) + + HADOOP-2268. Fix org.apache.hadoop.mapred.jobcontrol classes to use the + List/Map interfaces rather than concrete ArrayList/HashMap classes + internally. (Adrian Woodhead via acmurthy) + + HADOOP-2406. Add a benchmark for measuring read/write performance through + the InputFormat interface, particularly with compression. (cdouglas) + + HADOOP-2131. Allow finer-grained control over speculative-execution. Now + users can set it for maps and reduces independently. + Configuration changes to hadoop-default.xml: + deprecated mapred.speculative.execution + add mapred.map.tasks.speculative.execution + add mapred.reduce.tasks.speculative.execution + (Amareshwari Sri Ramadasu via acmurthy) + + HADOOP-1965. Interleave sort/spill in teh map-task along with calls to the + Mapper.map method. This is done by splitting the 'io.sort.mb' buffer into + two and using one half for collecting map-outputs and the other half for + sort/spill. (Amar Kamat via acmurthy) + + HADOOP-2464. Unit tests for chmod, chown, and chgrp using DFS. + (Raghu Angadi) + + HADOOP-1876. Persist statuses of completed jobs in HDFS so that the + JobClient can query and get information about decommissioned jobs and also + across JobTracker restarts. + Configuration changes to hadoop-default.xml: + add mapred.job.tracker.persist.jobstatus.active (default value of false) + add mapred.job.tracker.persist.jobstatus.hours (default value of 0) + add mapred.job.tracker.persist.jobstatus.dir (default value of + /jobtracker/jobsInfo) + (Alejandro Abdelnur via acmurthy) + + HADOOP-2077. Added version and build information to STARTUP_MSG for all + hadoop daemons to aid error-reporting, debugging etc. (acmurthy) + + HADOOP-2398. Additional instrumentation for NameNode and RPC server. + Add support for accessing instrumentation statistics via JMX. + (Sanjay radia via dhruba) + + HADOOP-2449. A return of the non-MR version of NNBench. + (Sanjay Radia via shv) + + HADOOP-1989. Remove 'datanodecluster' command from bin/hadoop. + (Sanjay Radia via shv) + + HADOOP-1742. Improve JavaDoc documentation for ClientProtocol, DFSClient, + and FSNamesystem. (Konstantin Shvachko) + + HADOOP-2298. Add Ant target for a binary-only distribution. + (Hrishikesh via nigel) + + HADOOP-2509. Add Ant target for Rat report (Apache license header + reports). (Hrishikesh via nigel) + + HADOOP-2469. WritableUtils.clone should take a Configuration + instead of a JobConf. (stack via omalley) + + HADOOP-2659. Introduce superuser permissions for admin operations. + (Tsz Wo (Nicholas), SZE via shv) + + HADOOP-2596. Added a SequenceFile.createWriter api which allows the user + to specify the blocksize, replication factor and the buffersize to be + used for the underlying HDFS file. (Alejandro Abdelnur via acmurthy) + + HADOOP-2431. Test HDFS File Permissions. (Hairong Kuang via shv) + + HADOOP-2232. Add an option to disable Nagle's algorithm in the IPC stack. + (Clint Morgan via cdouglas) + + HADOOP-2342. Created a micro-benchmark for measuring + local-file versus hdfs reads. (Owen O'Malley via nigel) + + HADOOP-2529. First version of HDFS User Guide. (Raghu Angadi) + + HADOOP-2690. Add jar-test target to build.xml, separating compilation + and packaging of the test classes. (Enis Soztutar via cdouglas) + + OPTIMIZATIONS + + HADOOP-1898. Release the lock protecting the last time of the last stack + dump while the dump is happening. (Amareshwari Sri Ramadasu via omalley) + + HADOOP-1900. Makes the heartbeat and task event queries interval + dependent on the cluster size. (Amareshwari Sri Ramadasu via ddas) + + HADOOP-2208. Counter update frequency (from TaskTracker to JobTracker) is + capped at 1 minute. (Amareshwari Sri Ramadasu via ddas) + + HADOOP-2284. Reduce the number of progress updates during the sorting in + the map task. (Amar Kamat via ddas) + + BUG FIXES + + HADOOP-2583. Fixes a bug in the Eclipse plug-in UI to edit locations. + Plug-in version is now synchronized with Hadoop version. + + HADOOP-2100. Remove faulty check for existence of $HADOOP_PID_DIR and let + 'mkdir -p' check & create it. (Michael Bieniosek via acmurthy) + + HADOOP-1642. Ensure jobids generated by LocalJobRunner are unique to + avoid collissions and hence job-failures. (Doug Cutting via acmurthy) + + HADOOP-2096. Close open file-descriptors held by streams while localizing + job.xml in the JobTracker and while displaying it on the webui in + jobconf.jsp. (Amar Kamat via acmurthy) + + HADOOP-2098. Log start & completion of empty jobs to JobHistory, which + also ensures that we close the file-descriptor of the job's history log + opened during job-submission. (Amar Kamat via acmurthy) + + HADOOP-2112. Adding back changes to build.xml lost while reverting + HADOOP-1622 i.e. http://svn.apache.org/viewvc?view=rev&revision=588771. + (acmurthy) + + HADOOP-2089. Fixes the command line argument handling to handle multiple + -cacheArchive in Hadoop streaming. (Lohit Vijayarenu via ddas) + + HADOOP-2071. Fix StreamXmlRecordReader to use a BufferedInputStream + wrapped over the DFSInputStream since mark/reset aren't supported by + DFSInputStream anymore. (Lohit Vijayarenu via acmurthy) + + HADOOP-1348. Allow XML comments inside configuration files. + (Rajagopal Natarajan and Enis Soztutar via enis) + + HADOOP-1952. Improve handling of invalid, user-specified classes while + configuring streaming jobs such as combiner, input/output formats etc. + Now invalid options are caught, logged and jobs are failed early. (Lohit + Vijayarenu via acmurthy) + + HADOOP-2151. FileSystem.globPaths validates the list of Paths that + it returns. (Lohit Vijayarenu via dhruba) + + HADOOP-2121. Cleanup DFSOutputStream when the stream encountered errors + when Datanodes became full. (Raghu Angadi via dhruba) + + HADOOP-1130. The FileSystem.closeAll() method closes all existing + DFSClients. (Chris Douglas via dhruba) + + HADOOP-2204. DFSTestUtil.waitReplication was not waiting for all replicas + to get created, thus causing unit test failure. + (Raghu Angadi via dhruba) + + HADOOP-2078. An zero size file may have no blocks associated with it. + (Konstantin Shvachko via dhruba) + + HADOOP-2212. ChecksumFileSystem.getSumBufferSize might throw + java.lang.ArithmeticException. The fix is to initialize bytesPerChecksum + to 0. (Michael Bieniosek via ddas) + + HADOOP-2216. Fix jobtasks.jsp to ensure that it first collects the + taskids which satisfy the filtering criteria and then use that list to + print out only the required task-reports, previously it was oblivious to + the filtering and hence used the wrong index into the array of task-reports. + (Amar Kamat via acmurthy) + + HADOOP-2272. Fix findbugs target to reflect changes made to the location + of the streaming jar file by HADOOP-2207. (Adrian Woodhead via nigel) + + HADOOP-2244. Fixes the MapWritable.readFields to clear the instance + field variable every time readFields is called. (Michael Stack via ddas). + + HADOOP-2245. Fixes LocalJobRunner to include a jobId in the mapId. Also, + adds a testcase for JobControl. (Adrian Woodhead via ddas). + + HADOOP-2275. Fix erroneous detection of corrupted file when namenode + fails to allocate any datanodes for newly allocated block. + (Dhruba Borthakur via dhruba) + + HADOOP-2256. Fix a buf in the namenode that could cause it to encounter + an infinite loop while deleting excess replicas that were created by + block rebalancing. (Hairong Kuang via dhruba) + + HADOOP-2209. SecondaryNamenode process exits if it encounters exceptions + that it cannot handle. (Dhruba Borthakur via dhruba) + + HADOOP-2314. Prevent TestBlockReplacement from occasionally getting + into an infinite loop. (Hairong Kuang via dhruba) + + HADOOP-2300. This fixes a bug where mapred.tasktracker.tasks.maximum + would be ignored even if it was set in hadoop-site.xml. + (Amareshwari Sri Ramadasu via ddas) + + HADOOP-2349. Improve code layout in file system transaction logging code. + (Tsz Wo (Nicholas), SZE via dhruba) + + HADOOP-2368. Fix unit tests on Windows. + (Tsz Wo (Nicholas), SZE via dhruba) + + HADOOP-2363. This fix allows running multiple instances of the unit test + in parallel. The bug was introduced in HADOOP-2185 that changed + port-rolling behaviour. (Konstantin Shvachko via dhruba) + + HADOOP-2271. Fix chmod task to be non-parallel. (Adrian Woodhead via + omalley) + + HADOOP-2313. Fail the build if building libhdfs fails. (nigel via omalley) + + HADOOP-2359. Remove warning for interruptted exception when closing down + minidfs. (dhruba via omalley) + + HADOOP-1841. Prevent slow clients from consuming threads in the NameNode. + (dhruba) + + HADOOP-2323. JobTracker.close() should not print stack traces for + normal exit. (jimk via cutting) + + HADOOP-2376. Prevents sort example from overriding the number of maps. + (Owen O'Malley via ddas) + + HADOOP-2434. FSDatasetInterface read interface causes HDFS reads to occur + in 1 byte chunks, causing performance degradation. + (Raghu Angadi via dhruba) + + HADOOP-2459. Fix package target so that src/docs/build files are not + included in the release. (nigel) + + HADOOP-2215. Fix documentation in cluster_setup.html & + mapred_tutorial.html reflect that mapred.tasktracker.tasks.maximum has + been superceeded by mapred.tasktracker.{map|reduce}.tasks.maximum. + (Amareshwari Sri Ramadasu via acmurthy) + + HADOOP-2459. Fix package target so that src/docs/build files are not + included in the release. (nigel) + + HADOOP-2352. Remove AC_CHECK_LIB for libz and liblzo to ensure that + libhadoop.so doesn't have a dependency on them. (acmurthy) + + HADOOP-2453. Fix the configuration for wordcount-simple example in Hadoop + Pipes which currently produces an XML parsing error. (Amareshwari Sri + Ramadasu via acmurthy) + + HADOOP-2476. Unit test failure while reading permission bits of local + file system (on Windows) fixed. (Raghu Angadi via dhruba) + + HADOOP-2247. Fine-tune the strategies for killing mappers and reducers + due to failures while fetching map-outputs. Now the map-completion times + and number of currently running reduces are taken into account by the + JobTracker before killing the mappers, while the progress made by the + reducer and the number of fetch-failures vis-a-vis total number of + fetch-attempts are taken into account before teh reducer kills itself. + (Amar Kamat via acmurthy) + + HADOOP-2452. Fix eclipse plug-in build.xml to refers to the right + location where hadoop-*-core.jar is generated. (taton) + + HADOOP-2492. Additional debugging in the rpc server to better + diagnose ConcurrentModificationException. (dhruba) + + HADOOP-2344. Enhance the utility for executing shell commands to read the + stdout/stderr streams while waiting for the command to finish (to free up + the buffers). Also, this patch throws away stderr of the DF utility. + @deprecated + org.apache.hadoop.fs.ShellCommand for org.apache.hadoop.util.Shell + org.apache.hadoop.util.ShellUtil for + org.apache.hadoop.util.Shell.ShellCommandExecutor + (Amar Kamat via acmurthy) + + HADOOP-2511. Fix a javadoc warning in org.apache.hadoop.util.Shell + introduced by HADOOP-2344. (acmurthy) + + HADOOP-2442. Fix TestLocalFileSystemPermission.testLocalFSsetOwner + to work on more platforms. (Raghu Angadi via nigel) + + HADOOP-2488. Fix a regression in random read performance. + (Michael Stack via rangadi) + + HADOOP-2523. Fix TestDFSShell.testFilePermissions on Windows. + (Raghu Angadi via nigel) + + HADOOP-2535. Removed support for deprecated mapred.child.heap.size and + fixed some indentation issues in TaskRunner. (acmurthy) + Configuration changes to hadoop-default.xml: + remove mapred.child.heap.size + + HADOOP-2512. Fix error stream handling in Shell. Use exit code to + detect shell command errors in RawLocalFileSystem. (Raghu Angadi) + + HADOOP-2446. Fixes TestHDFSServerPorts and TestMRServerPorts so they + do not rely on statically configured ports and cleanup better. (nigel) + + HADOOP-2537. Make build process compatible with Ant 1.7.0. + (Hrishikesh via nigel) + + HADOOP-1281. Ensure running tasks of completed map TIPs (e.g. speculative + tasks) are killed as soon as the TIP completed. (acmurthy) + + HADOOP-2571. Suppress a suprious warning in test code. (cdouglas) + + HADOOP-2481. NNBench report its progress periodically. + (Hairong Kuang via dhruba) + + HADOOP-2601. Start name-node on a free port for TestNNThroughputBenchmark. + (Konstantin Shvachko) + + HADOOP-2494. Set +x on contrib/*/bin/* in packaged tar bundle. + (stack via tomwhite) + + HADOOP-2605. Remove bogus leading slash in task-tracker report bindAddress. + (Konstantin Shvachko) + + HADOOP-2620. Trivial. 'bin/hadoop fs -help' did not list chmod, chown, and + chgrp. (Raghu Angadi) + + HADOOP-2614. The DFS WebUI accesses are configured to be from the user + specified by dfs.web.ugi. (Tsz Wo (Nicholas), SZE via dhruba) + + HADOOP-2543. Implement a "no-permission-checking" mode for smooth + upgrade from a pre-0.16 install of HDFS. + (Hairong Kuang via dhruba) + + HADOOP-290. A DataNode log message now prints the target of a replication + request correctly. (dhruba) + + HADOOP-2538. Redirect to a warning, if plaintext parameter is true but + the filter parameter is not given in TaskLogServlet. + (Michael Bieniosek via enis) + + HADOOP-2582. Prevent 'bin/hadoop fs -copyToLocal' from creating + zero-length files when the src does not exist. + (Lohit Vijayarenu via cdouglas) + + HADOOP-2189. Incrementing user counters should count as progress. (ddas) + + HADOOP-2649. The NameNode periodically computes replication work for + the datanodes. The periodicity of this computation is now configurable. + (dhruba) + + HADOOP-2549. Correct disk size computation so that data-nodes could switch + to other local drives if current is full. (Hairong Kuang via shv) + + HADOOP-2633. Fsck should call name-node methods directly rather than + through rpc. (Tsz Wo (Nicholas), SZE via shv) + + HADOOP-2687. Modify a few log message generated by dfs client to be + logged only at INFO level. (stack via dhruba) + + HADOOP-2402. Fix BlockCompressorStream to ensure it buffers data before + sending it down to the compressor so that each write call doesn't + compress. (Chris Douglas via acmurthy) + + HADOOP-2645. The Metrics initialization code does not throw + exceptions when servers are restarted by MiniDFSCluster. + (Sanjay Radia via dhruba) + + HADOOP-2691. Fix a race condition that was causing the DFSClient + to erroneously remove a good datanode from a pipeline that actually + had another datanode that was bad. (dhruba) + + HADOOP-1195. All code in FSNamesystem checks the return value + of getDataNode for null before using it. (dhruba) + + HADOOP-2640. Fix a bug in MultiFileSplitInputFormat that was always + returning 1 split in some circumstances. (Enis Soztutar via nigel) + + HADOOP-2626. Fix paths with special characters to work correctly + with the local filesystem. (Thomas Friol via cutting) + + HADOOP-2646. Fix SortValidator to work with fully-qualified + working directories. (Arun C Murthy via nigel) + + HADOOP-2092. Added a ping mechanism to the pipes' task to periodically + check if the parent Java task is running, and exit if the parent isn't + alive and responding. (Amareshwari Sri Ramadasu via acmurthy) + + HADOOP-2714. TestDecommission failed on windows because the replication + request was timing out. (dhruba) + + HADOOP-2576. Namenode performance degradation over time triggered by + large heartbeat interval. (Raghu Angadi) + + HADOOP-2713. TestDatanodeDeath failed on windows because the replication + request was timing out. (dhruba) + + HADOOP-2639. Fixes a problem to do with incorrect maintenance of values + for runningMapTasks/runningReduceTasks. (Amar Kamat and Arun Murthy + via ddas) + + HADOOP-2723. Fixed the check for checking whether to do user task + profiling. (Amareshwari Sri Ramadasu via omalley) + + HADOOP-2734. Link forrest docs to new http://hadoop.apache.org + (Doug Cutting via nigel) + + HADOOP-2641. Added Apache license headers to 95 files. (nigel) + + HADOOP-2732. Fix bug in path globbing. (Hairong Kuang via nigel) + + HADOOP-2404. Fix backwards compatability with hadoop-0.15 configuration + files that was broken by HADOOP-2185. (omalley) + + HADOOP-2755. Fix fsck performance degradation because of permissions + issue. (Tsz Wo (Nicholas), SZE via dhruba) + + HADOOP-2768. Fix performance regression caused by HADOOP-1707. + (dhruba borthakur via nigel) + + HADOOP-3108. Fix NPE in setPermission and setOwner. (shv) + +Release 0.15.3 - 2008-01-18 + + BUG FIXES + + HADOOP-2562. globPaths supports {ab,cd}. (Hairong Kuang via dhruba) + + HADOOP-2540. fsck reports missing blocks incorrectly. (dhruba) + + HADOOP-2570. "work" directory created unconditionally, and symlinks + created from the task cwds. + + HADOOP-2574. Fixed mapred_tutorial.xml to correct minor errors with the + WordCount examples. (acmurthy) + +Release 0.15.2 - 2008-01-02 + + BUG FIXES + + HADOOP-2246. Moved the changelog for HADOOP-1851 from the NEW FEATURES + section to the INCOMPATIBLE CHANGES section. (acmurthy) + + HADOOP-2238. Fix TaskGraphServlet so that it sets the content type of + the response appropriately. (Paul Saab via enis) + + HADOOP-2129. Fix so that distcp works correctly when source is + HDFS but not the default filesystem. HDFS paths returned by the + listStatus() method are now fully-qualified. (cutting) + + HADOOP-2378. Fixes a problem where the last task completion event would + get created after the job completes. (Alejandro Abdelnur via ddas) + + HADOOP-2228. Checks whether a job with a certain jobId is already running + and then tries to create the JobInProgress object. + (Johan Oskarsson via ddas) + + HADOOP-2422. dfs -cat multiple files fail with 'Unable to write to + output stream'. (Raghu Angadi via dhruba) + + HADOOP-2460. When the namenode encounters ioerrors on writing a + transaction log, it stops writing new transactions to that one. + (Raghu Angadi via dhruba) + + HADOOP-2227. Use the LocalDirAllocator uniformly for handling all of the + temporary storage required for a given task. It also implies that + mapred.local.dir.minspacestart is handled by checking if there is enough + free-space on any one of the available disks. (Amareshwari Sri Ramadasu + via acmurthy) + + HADOOP-2437. Fix the LocalDirAllocator to choose the seed for the + round-robin disk selections randomly. This helps in spreading data across + multiple partitions much better. (acmurhty) + + HADOOP-2486. When the list of files from the InMemoryFileSystem is obtained + for merging, this patch will ensure that only those files whose checksums + have also got created (renamed) are returned. (ddas) + + HADOOP-2456. Hardcode English locale to prevent NumberFormatException + from occurring when starting the NameNode with certain locales. + (Matthias Friedrich via nigel) + + IMPROVEMENTS + + HADOOP-2160. Remove project-level, non-user documentation from + releases, since it's now maintained in a separate tree. (cutting) + + HADOOP-1327. Add user documentation for streaming. (cutting) + + HADOOP-2382. Add hadoop-default.html to subversion. (cutting) + + HADOOP-2158. hdfsListDirectory calls FileSystem.listStatus instead + of FileSystem.listPaths. This reduces the number of RPC calls on the + namenode, thereby improving scalability. (Christian Kunz via dhruba) + +Release 0.15.1 - 2007-11-27 + + INCOMPATIBLE CHANGES + + HADOOP-713. Reduce CPU usage on namenode while listing directories. + FileSystem.listPaths does not return the size of the entire subtree. + Introduced a new API ClientProtocol.getContentLength that returns the + size of the subtree. (Dhruba Borthakur via dhruba) + + IMPROVEMENTS + + HADOOP-1917. Addition of guides/tutorial for better overall + documentation for Hadoop. Specifically: + * quickstart.html is targetted towards first-time users and helps them + setup a single-node cluster and play with Hadoop. + * cluster_setup.html helps admins to configure and setup non-trivial + hadoop clusters. + * mapred_tutorial.html is a comprehensive Map-Reduce tutorial. + (acmurthy) + + BUG FIXES + + HADOOP-2174. Removed the unnecessary Reporter.setStatus call from + FSCopyFilesMapper.close which led to a NPE since the reporter isn't valid + in the close method. (Chris Douglas via acmurthy) + + HADOOP-2172. Restore performance of random access to local files + by caching positions of local input streams, avoiding a system + call. (cutting) + + HADOOP-2205. Regenerate the Hadoop website since some of the changes made + by HADOOP-1917 weren't correctly copied over to the trunk/docs directory. + Also fixed a couple of minor typos and broken links. (acmurthy) + +Release 0.15.0 - 2007-11-2 + + INCOMPATIBLE CHANGES + + HADOOP-1708. Make files appear in namespace as soon as they are + created. (Dhruba Borthakur via dhruba) + + HADOOP-999. A HDFS Client immediately informs the NameNode of a new + file creation. ClientProtocol version changed from 14 to 15. + (Tsz Wo (Nicholas), SZE via dhruba) + + HADOOP-932. File locking interfaces and implementations (that were + earlier deprecated) are removed. Client Protocol version changed + from 15 to 16. (Raghu Angadi via dhruba) + + HADOOP-1621. FileStatus is now a concrete class and FileSystem.listPaths + is deprecated and replaced with listStatus. (Chris Douglas via omalley) + + HADOOP-1656. The blockSize of a file is stored persistently in the file + inode. (Dhruba Borthakur via dhruba) + + HADOOP-1838. The blocksize of files created with an earlier release is + set to the default block size. (Dhruba Borthakur via dhruba) + + HADOOP-785. Add support for 'final' Configuration parameters, + removing support for 'mapred-default.xml', and changing + 'hadoop-site.xml' to not override other files. Now folks should + generally use 'hadoop-site.xml' for all configurations. Values + with a 'final' tag may not be overridden by subsequently loaded + configuration files, e.g., by jobs. (Arun C. Murthy via cutting) + + HADOOP-1846. DatanodeReport in ClientProtocol can report live + datanodes, dead datanodes or all datanodes. Client Protocol version + changed from 17 to 18. (Hairong Kuang via dhruba) + + HADOOP-1851. Permit specification of map output compression type + and codec, independent of the final output's compression + parameters. (Arun C Murthy via cutting) + + HADOOP-1819. Jobtracker cleanups, including binding ports before + clearing state directories, so that inadvertently starting a + second jobtracker doesn't trash one that's already running. Removed + method JobTracker.getTracker() because the static variable, which + stored the value caused initialization problems. + (omalley via cutting) + + NEW FEATURES + + HADOOP-89. A client can access file data even before the creator + has closed the file. Introduce a new command "tail" from dfs shell. + (Dhruba Borthakur via dhruba) + + HADOOP-1636. Allow configuration of the number of jobs kept in + memory by the JobTracker. (Michael Bieniosek via omalley) + + HADOOP-1667. Reorganize CHANGES.txt into sections to make it + easier to read. Also remove numbering, to make merging easier. + (cutting) + + HADOOP-1610. Add metrics for failed tasks. + (Devaraj Das via tomwhite) + + HADOOP-1767. Add "bin/hadoop job -list" sub-command. (taton via cutting) + + HADOOP-1351. Add "bin/hadoop job [-fail-task|-kill-task]" sub-commands + to terminate a particular task-attempt. (Enis Soztutar via acmurthy) + + HADOOP-1880. SleepJob : An example job that sleeps at each map and + reduce task. (enis) + + HADOOP-1809. Add a link in web site to #hadoop IRC channel. (enis) + + HADOOP-1894. Add percentage graphs and mapred task completion graphs + to Web User Interface. Users not using Firefox may install a plugin to + their browsers to see svg graphics. (enis) + + HADOOP-1914. Introduce a new NamenodeProtocol to allow secondary + namenodes and rebalancing processes to communicate with a primary + namenode. (Hairong Kuang via dhruba) + + HADOOP-1963. Add a FileSystem implementation for the Kosmos + Filesystem (KFS). (Sriram Rao via cutting) + + HADOOP-1822. Allow the specialization and configuration of socket + factories. Provide a StandardSocketFactory, and a SocksSocketFactory to + allow the use of SOCKS proxies. (taton). + + HADOOP-1968. FileSystem supports wildcard input syntax "{ }". + (Hairong Kuang via dhruba) + + HADOOP-2566. Add globStatus method to the FileSystem interface + and deprecate globPath and listPath. (Hairong Kuang via hairong) + + OPTIMIZATIONS + + HADOOP-1910. Reduce the number of RPCs that DistributedFileSystem.create() + makes to the namenode. (Raghu Angadi via dhruba) + + HADOOP-1565. Reduce memory usage of NameNode by replacing + TreeMap in HDFS Namespace with ArrayList. + (Dhruba Borthakur via dhruba) + + HADOOP-1743. Change DFS INode from a nested class to standalone + class, with specialized subclasses for directories and files, to + save memory on the namenode. (Konstantin Shvachko via cutting) + + HADOOP-1759. Change file name in INode from String to byte[], + saving memory on the namenode. (Konstantin Shvachko via cutting) + + HADOOP-1766. Save memory in namenode by having BlockInfo extend + Block, and replace many uses of Block with BlockInfo. + (Konstantin Shvachko via cutting) + + HADOOP-1687. Save memory in namenode by optimizing BlockMap + representation. (Konstantin Shvachko via cutting) + + HADOOP-1774. Remove use of INode.parent in Block CRC upgrade. + (Raghu Angadi via dhruba) + + HADOOP-1788. Increase the buffer size on the Pipes command socket. + (Amareshwari Sri Ramadasu and Christian Kunz via omalley) + + BUG FIXES + + HADOOP-1946. The Datanode code does not need to invoke du on + every heartbeat. (Hairong Kuang via dhruba) + + HADOOP-1935. Fix a NullPointerException in internalReleaseCreate. + (Dhruba Borthakur) + + HADOOP-1933. The nodes listed in include and exclude files + are always listed in the datanode report. + (Raghu Angadi via dhruba) + + HADOOP-1953. The job tracker should wait beteween calls to try and delete + the system directory (Owen O'Malley via devaraj) + + HADOOP-1932. TestFileCreation fails with message saying filestatus.dat + is of incorrect size. (Dhruba Borthakur via dhruba) + + HADOOP-1573. Support for 0 reducers in PIPES. + (Owen O'Malley via devaraj) + + HADOOP-1500. Fix typographical errors in the DFS WebUI. + (Nigel Daley via dhruba) + + HADOOP-1076. Periodic checkpoint can continue even if an earlier + checkpoint encountered an error. (Dhruba Borthakur via dhruba) + + HADOOP-1887. The Namenode encounters an ArrayIndexOutOfBoundsException + while listing a directory that had a file that was + being actively written to. (Dhruba Borthakur via dhruba) + + HADOOP-1904. The Namenode encounters an exception because the + list of blocks per datanode-descriptor was corrupted. + (Konstantin Shvachko via dhruba) + + HADOOP-1762. The Namenode fsimage does not contain a list of + Datanodes. (Raghu Angadi via dhruba) + + HADOOP-1890. Removed debugging prints introduced by HADOOP-1774. + (Raghu Angadi via dhruba) + + HADOOP-1763. Too many lost task trackers on large clusters due to + insufficient number of RPC handler threads on the JobTracker. + (Devaraj Das) + + HADOOP-1463. HDFS report correct usage statistics for disk space + used by HDFS. (Hairong Kuang via dhruba) + + HADOOP-1692. In DFS ant task, don't cache the Configuration. + (Chris Douglas via cutting) + + HADOOP-1726. Remove lib/jetty-ext/ant.jar. (omalley) + + HADOOP-1772. Fix hadoop-daemon.sh script to get correct hostname + under Cygwin. (Tsz Wo (Nicholas), SZE via cutting) + + HADOOP-1749. Change TestDFSUpgrade to sort files, fixing sporadic + test failures. (Enis Soztutar via cutting) + + HADOOP-1748. Fix tasktracker to be able to launch tasks when log + directory is relative. (omalley via cutting) + + HADOOP-1775. Fix a NullPointerException and an + IllegalArgumentException in MapWritable. + (Jim Kellerman via cutting) + + HADOOP-1795. Fix so that jobs can generate output file names with + special characters. (Fr??d??ric Bertin via cutting) + + HADOOP-1810. Fix incorrect value type in MRBench (SmallJobs) + (Devaraj Das via tomwhite) + + HADOOP-1806. Fix ant task to compile again, also fix default + builds to compile ant tasks. (Chris Douglas via cutting) + + HADOOP-1758. Fix escape processing in librecordio to not be + quadratic. (Vivek Ratan via cutting) + + HADOOP-1817. Fix MultiFileSplit to read and write the split + length, so that it is not always zero in map tasks. + (Thomas Friol via cutting) + + HADOOP-1853. Fix contrib/streaming to accept multiple -cacheFile + options. (Prachi Gupta via cutting) + + HADOOP-1818. Fix MultiFileInputFormat so that it does not return + empty splits when numPaths < numSplits. (Thomas Friol via enis) + + HADOOP-1840. Fix race condition which leads to task's diagnostic + messages getting lost. (acmurthy) + + HADOOP-1885. Fix race condition in MiniDFSCluster shutdown. + (Chris Douglas via nigel) + + HADOOP-1889. Fix path in EC2 scripts for building your own AMI. + (tomwhite) + + HADOOP-1892. Fix a NullPointerException in the JobTracker when + trying to fetch a task's diagnostic messages from the JobClient. + (Amar Kamat via acmurthy) + + HADOOP-1897. Completely remove about.html page from the web site. + (enis) + + HADOOP-1907. Fix null pointer exception when getting task diagnostics + in JobClient. (Christian Kunz via omalley) + + HADOOP-1882. Remove spurious asterisks from decimal number displays. + (Raghu Angadi via cutting) + + HADOOP-1783. Make S3 FileSystem return Paths fully-qualified with + scheme and host. (tomwhite) + + HADOOP-1925. Make pipes' autoconf script look for libsocket and libnsl, so + that it can compile under Solaris. (omalley) + + HADOOP-1940. TestDFSUpgradeFromImage must shut down its MiniDFSCluster. + (Chris Douglas via nigel) + + HADOOP-1930. Fix the blame for failed fetchs on the right host. (Arun C. + Murthy via omalley) + + HADOOP-1934. Fix the platform name on Mac to use underscores rather than + spaces. (omalley) + + HADOOP-1959. Use "/" instead of File.separator in the StatusHttpServer. + (jimk via omalley) + + HADOOP-1626. Improve dfsadmin help messages. + (Lohit Vijayarenu via dhruba) + + HADOOP-1695. The SecondaryNamenode waits for the Primary NameNode to + start up. (Dhruba Borthakur) + + HADOOP-1983. Have Pipes flush the command socket when progress is sent + to prevent timeouts during long computations. (omalley) + + HADOOP-1875. Non-existant directories or read-only directories are + filtered from dfs.client.buffer.dir. (Hairong Kuang via dhruba) + + HADOOP-1992. Fix the performance degradation in the sort validator. + (acmurthy via omalley) + + HADOOP-1874. Move task-outputs' promotion/discard to a separate thread + distinct from the main heartbeat-processing thread. The main upside being + that we do not lock-up the JobTracker during HDFS operations, which + otherwise may lead to lost tasktrackers if the NameNode is unresponsive. + (Devaraj Das via acmurthy) + + HADOOP-2026. Namenode prints out one log line for "Number of transactions" + at most once every minute. (Dhruba Borthakur) + + HADOOP-2022. Ensure that status information for successful tasks is correctly + recorded at the JobTracker, so that, for example, one may view correct + information via taskdetails.jsp. This bug was introduced by HADOOP-1874. + (Amar Kamat via acmurthy) + + HADOOP-2031. Correctly maintain the taskid which takes the TIP to + completion, failing which the case of lost tasktrackers isn't handled + properly i.e. the map TIP is incorrectly left marked as 'complete' and it + is never rescheduled elsewhere, leading to hung reduces. + (Devaraj Das via acmurthy) + + HADOOP-2018. The source datanode of a data transfer waits for + a response from the target datanode before closing the data stream. + (Hairong Kuang via dhruba) + + HADOOP-2023. Disable TestLocalDirAllocator on Windows. + (Hairong Kuang via nigel) + + HADOOP-2016. Ignore status-updates from FAILED/KILLED tasks at the + TaskTracker. This fixes a race-condition which caused the tasks to wrongly + remain in the RUNNING state even after being killed by the JobTracker and + thus handicap the cleanup of the task's output sub-directory. (acmurthy) + + HADOOP-1771. Fix a NullPointerException in streaming caused by an + IOException in MROutputThread. (lohit vijayarenu via nigel) + + HADOOP-2028. Fix distcp so that the log dir does not need to be + specified and the destination does not need to exist. + (Chris Douglas via nigel) + + HADOOP-2044. The namenode protects all lease manipulations using a + sortedLease lock. (Dhruba Borthakur) + + HADOOP-2051. The TaskCommit thread should not die for exceptions other + than the InterruptedException. This behavior is there for the other long + running threads in the JobTracker. (Arun C Murthy via ddas) + + HADOOP-1973. The FileSystem object would be accessed on the JobTracker + through a RPC in the InterTrackerProtocol. The check for the object being + null was missing and hence NPE would be thrown sometimes. This issue fixes + that problem. (Amareshwari Sri Ramadasu via ddas) + + HADOOP-2033. The SequenceFile.Writer.sync method was a no-op, which caused + very uneven splits for applications like distcp that count on them. + (omalley) + + HADOOP-2070. Added a flush method to pipes' DownwardProtocol and call + that before waiting for the application to finish to ensure all buffered + data is flushed. (Owen O'Malley via acmurthy) + + HADOOP-2080. Fixed calculation of the checksum file size when the values + are large. (omalley) + + HADOOP-2048. Change error handling in distcp so that each map copies + as much as possible before reporting the error. Also report progress on + every copy. (Chris Douglas via omalley) + + HADOOP-2073. Change size of VERSION file after writing contents to it. + (Konstantin Shvachko via dhruba) + + HADOOP-2102. Fix the deprecated ToolBase to pass its Configuration object + to the superceding ToolRunner to ensure it picks up the appropriate + configuration resources. (Dennis Kubes and Enis Soztutar via acmurthy) + + HADOOP-2103. Fix minor javadoc bugs introduce by HADOOP-2046. (Nigel + Daley via acmurthy) + + IMPROVEMENTS + + HADOOP-1908. Restructure data node code so that block sending and + receiving are seperated from data transfer header handling. + (Hairong Kuang via dhruba) + + HADOOP-1921. Save the configuration of completed/failed jobs and make them + available via the web-ui. (Amar Kamat via devaraj) + + HADOOP-1266. Remove dependency of package org.apache.hadoop.net on + org.apache.hadoop.dfs. (Hairong Kuang via dhruba) + + HADOOP-1779. Replace INodeDirectory.getINode() by a getExistingPathINodes() + to allow the retrieval of all existing INodes along a given path in a + single lookup. This facilitates removal of the 'parent' field in the + inode. (Christophe Taton via dhruba) + + HADOOP-1756. Add toString() to some Writable-s. (ab) + + HADOOP-1727. New classes: MapWritable and SortedMapWritable. + (Jim Kellerman via ab) + + HADOOP-1651. Improve progress reporting. + (Devaraj Das via tomwhite) + + HADOOP-1595. dfsshell can wait for a file to achieve its intended + replication target. (Tsz Wo (Nicholas), SZE via dhruba) + + HADOOP-1693. Remove un-needed log fields in DFS replication classes, + since the log may be accessed statically. (Konstantin Shvachko via cutting) + + HADOOP-1231. Add generics to Mapper and Reducer interfaces. + (tomwhite via cutting) + + HADOOP-1436. Improved command-line APIs, so that all tools need + not subclass ToolBase, and generic parameter parser is public. + (Enis Soztutar via cutting) + + HADOOP-1703. DFS-internal code cleanups, removing several uses of + the obsolete UTF8. (Christophe Taton via cutting) + + HADOOP-1731. Add Hadoop's version to contrib jar file names. + (cutting) + + HADOOP-1689. Make shell scripts more portable. All shell scripts + now explicitly depend on bash, but do not require that bash be + installed in a particular location, as long as it is on $PATH. + (cutting) + + HADOOP-1744. Remove many uses of the deprecated UTF8 class from + the HDFS namenode. (Christophe Taton via cutting) + + HADOOP-1654. Add IOUtils class, containing generic io-related + utility methods. (Enis Soztutar via cutting) + + HADOOP-1158. Change JobTracker to record map-output transmission + errors and use them to trigger speculative re-execution of tasks. + (Arun C Murthy via cutting) + + HADOOP-1601. Change GenericWritable to use ReflectionUtils for + instance creation, avoiding classloader issues, and to implement + Configurable. (Enis Soztutar via cutting) + + HADOOP-1750. Log standard output and standard error when forking + task processes. (omalley via cutting) + + HADOOP-1803. Generalize build.xml to make files in all + src/contrib/*/bin directories executable. (stack via cutting) + + HADOOP-1739. Let OS always choose the tasktracker's umbilical + port. Also switch default address for umbilical connections to + loopback. (cutting) + + HADOOP-1812. Let OS choose ports for IPC and RPC unit tests. (cutting) + + HADOOP-1825. Create $HADOOP_PID_DIR when it does not exist. + (Michael Bieniosek via cutting) + + HADOOP-1425. Replace uses of ToolBase with the Tool interface. + (Enis Soztutar via cutting) + + HADOOP-1569. Reimplement DistCP to use the standard FileSystem/URI + code in Hadoop so that you can copy from and to all of the supported file + systems.(Chris Douglas via omalley) + + HADOOP-1018. Improve documentation w.r.t handling of lost hearbeats between + TaskTrackers and JobTracker. (acmurthy) + + HADOOP-1718. Add ant targets for measuring code coverage with clover. + (simonwillnauer via nigel) + + HADOOP-1592. Log error messages to the client console when tasks + fail. (Amar Kamat via cutting) + + HADOOP-1879. Remove some unneeded casts. (Nilay Vaish via cutting) + + HADOOP-1878. Add space between priority links on job details + page. (Thomas Friol via cutting) + + HADOOP-120. In ArrayWritable, prevent creation with null value + class, and improve documentation. (Cameron Pope via cutting) + + HADOOP-1926. Add a random text writer example/benchmark so that we can + benchmark compression codecs on random data. (acmurthy via omalley) + + HADOOP-1906. Warn the user if they have an obsolete madred-default.xml + file in their configuration directory. (acmurthy via omalley) + + HADOOP-1971. Warn when job does not specify a jar. (enis via cutting) + + HADOOP-1942. Increase the concurrency of transaction logging to + edits log. Reduce the number of syncs by double-buffering the changes + to the transaction log. (Dhruba Borthakur) + + HADOOP-2046. Improve mapred javadoc. (Arun C. Murthy via cutting) + + HADOOP-2105. Improve overview.html to clarify supported platforms, + software pre-requisites for hadoop, how to install them on various + platforms and a better general description of hadoop and it's utility. + (Jim Kellerman via acmurthy) + + +Release 0.14.4 - 2007-11-26 + + BUG FIXES + + HADOOP-2140. Add missing Apache Licensing text at the front of several + C and C++ files. + + HADOOP-2169. Fix the DT_SONAME field of libhdfs.so to set it to the + correct value of 'libhdfs.so', currently it is set to the absolute path of + libhdfs.so. (acmurthy) + + HADOOP-2001. Make the job priority updates and job kills synchronized on + the JobTracker. Deadlock was seen in the JobTracker because of the lack of + this synchronization. (Arun C Murthy via ddas) + + +Release 0.14.3 - 2007-10-19 + + BUG FIXES + + HADOOP-2053. Fixed a dangling reference to a memory buffer in the map + output sorter. (acmurthy via omalley) + + HADOOP-2036. Fix a NullPointerException in JvmMetrics class. (nigel) + + HADOOP-2043. Release 0.14.2 was compiled with Java 1.6 rather than + Java 1.5. (cutting) + + +Release 0.14.2 - 2007-10-09 + + BUG FIXES + + HADOOP-1948. Removed spurious error message during block crc upgrade. + (Raghu Angadi via dhruba) + + HADOOP-1862. reduces are getting stuck trying to find map outputs. + (Arun C. Murthy via ddas) + + HADOOP-1977. Fixed handling of ToolBase cli options in JobClient. + (enis via omalley) + + HADOOP-1972. Fix LzoCompressor to ensure the user has actually asked + to finish compression. (arun via omalley) + + HADOOP-1970. Fix deadlock in progress reporting in the task. (Vivek + Ratan via omalley) + + HADOOP-1978. Name-node removes edits.new after a successful startup. + (Konstantin Shvachko via dhruba) + + HADOOP-1955. The Namenode tries to not pick the same source Datanode for + a replication request if the earlier replication request for the same + block and that source Datanode had failed. + (Raghu Angadi via dhruba) + + HADOOP-1961. The -get option to dfs-shell works when a single filename + is specified. (Raghu Angadi via dhruba) + + HADOOP-1997. TestCheckpoint closes the edits file after writing to it, + otherwise the rename of this file on Windows fails. + (Konstantin Shvachko via dhruba) + +Release 0.14.1 - 2007-09-04 + + BUG FIXES + + HADOOP-1740. Fix null pointer exception in sorting map outputs. (Devaraj + Das via omalley) + + HADOOP-1790. Fix tasktracker to work correctly on multi-homed + boxes. (Torsten Curdt via cutting) + + HADOOP-1798. Fix jobtracker to correctly account for failed + tasks. (omalley via cutting) + + +Release 0.14.0 - 2007-08-17 + + INCOMPATIBLE CHANGES + + 1. HADOOP-1134. + CONFIG/API - dfs.block.size must now be a multiple of + io.byte.per.checksum, otherwise new files can not be written. + LAYOUT - DFS layout version changed from -6 to -7, which will require an + upgrade from previous versions. + PROTOCOL - Datanode RPC protocol version changed from 7 to 8. + + 2. HADOOP-1283 + API - deprecated file locking API. + + 3. HADOOP-894 + PROTOCOL - changed ClientProtocol to fetch parts of block locations. + + 4. HADOOP-1336 + CONFIG - Enable speculative execution by default. + + 5. HADOOP-1197 + API - deprecated method for Configuration.getObject, because + Configurations should only contain strings. + + 6. HADOOP-1343 + API - deprecate Configuration.set(String,Object) so that only strings are + put in Configrations. + + 7. HADOOP-1207 + CLI - Fix FsShell 'rm' command to continue when a non-existent file is + encountered. + + 8. HADOOP-1473 + CLI/API - Job, TIP, and Task id formats have changed and are now unique + across job tracker restarts. + + 9. HADOOP-1400 + API - JobClient constructor now takes a JobConf object instead of a + Configuration object. + + NEW FEATURES and BUG FIXES + + 1. HADOOP-1197. In Configuration, deprecate getObject() and add + getRaw(), which skips variable expansion. (omalley via cutting) + + 2. HADOOP-1343. In Configuration, deprecate set(String,Object) and + implement Iterable. (omalley via cutting) + + 3. HADOOP-1344. Add RunningJob#getJobName(). (Michael Bieniosek via cutting) + + 4. HADOOP-1342. In aggregators, permit one to limit the number of + unique values per key. (Runping Qi via cutting) + + 5. HADOOP-1340. Set the replication factor of the MD5 file in the filecache + to be the same as the replication factor of the original file. + (Dhruba Borthakur via tomwhite.) + + 6. HADOOP-1355. Fix null pointer dereference in + TaskLogAppender.append(LoggingEvent). (Arun C Murthy via tomwhite.) + + 7. HADOOP-1357. Fix CopyFiles to correctly avoid removing "/". + (Arun C Murthy via cutting) + + 8. HADOOP-234. Add pipes facility, which permits writing MapReduce + programs in C++. + + 9. HADOOP-1359. Fix a potential NullPointerException in HDFS. + (Hairong Kuang via cutting) + + 10. HADOOP-1364. Fix inconsistent synchronization in SequenceFile. + (omalley via cutting) + + 11. HADOOP-1379. Add findbugs target to build.xml. + (Nigel Daley via cutting) + + 12. HADOOP-1364. Fix various inconsistent synchronization issues. + (Devaraj Das via cutting) + + 13. HADOOP-1393. Remove a potential unexpected negative number from + uses of random number generator. (omalley via cutting) + + 14. HADOOP-1387. A number of "performance" code-cleanups suggested + by findbugs. (Arun C Murthy via cutting) + + 15. HADOOP-1401. Add contrib/hbase javadoc to tree. (stack via cutting) + + 16. HADOOP-894. Change HDFS so that the client only retrieves a limited + number of block locations per request from the namenode. + (Konstantin Shvachko via cutting) + + 17. HADOOP-1406. Plug a leak in MapReduce's use of metrics. + (David Bowen via cutting) + + 18. HADOOP-1394. Implement "performance" code-cleanups in HDFS + suggested by findbugs. (Raghu Angadi via cutting) + + 19. HADOOP-1413. Add example program that uses Knuth's dancing links + algorithm to solve pentomino problems. (omalley via cutting) + + 20. HADOOP-1226. Change HDFS so that paths it returns are always + fully qualified. (Dhruba Borthakur via cutting) + + 21. HADOOP-800. Improvements to HDFS web-based file browser. + (Enis Soztutar via cutting) + + 22. HADOOP-1408. Fix a compiler warning by adding a class to replace + a generic. (omalley via cutting) + + 23. HADOOP-1376. Modify RandomWriter example so that it can generate + data for the Terasort benchmark. (Devaraj Das via cutting) + + 24. HADOOP-1429. Stop logging exceptions during normal IPC server + shutdown. (stack via cutting) + + 25. HADOOP-1461. Fix the synchronization of the task tracker to + avoid lockups in job cleanup. (Arun C Murthy via omalley) + + 26. HADOOP-1446. Update the TaskTracker metrics while the task is + running. (Devaraj via omalley) + + 27. HADOOP-1414. Fix a number of issues identified by FindBugs as + "Bad Practice". (Dhruba Borthakur via cutting) + + 28. HADOOP-1392. Fix "correctness" bugs identified by FindBugs in + fs and dfs packages. (Raghu Angadi via cutting) + + 29. HADOOP-1412. Fix "dodgy" bugs identified by FindBugs in fs and + io packages. (Hairong Kuang via cutting) + + 30. HADOOP-1261. Remove redundant events from HDFS namenode's edit + log when a datanode restarts. (Raghu Angadi via cutting) + + 31. HADOOP-1336. Re-enable speculative execution by + default. (omalley via cutting) + + 32. HADOOP-1311. Fix a bug in BytesWritable#set() where start offset + was ignored. (Dhruba Borthakur via cutting) + + 33. HADOOP-1450. Move checksumming closer to user code, so that + checksums are created before data is stored in large buffers and + verified after data is read from large buffers, to better catch + memory errors. (cutting) + + 34. HADOOP-1447. Add support in contrib/data_join for text inputs. + (Senthil Subramanian via cutting) + + 35. HADOOP-1456. Fix TestDecommission assertion failure by setting + the namenode to ignore the load on datanodes while allocating + replicas. (Dhruba Borthakur via tomwhite) + + 36. HADOOP-1396. Fix FileNotFoundException on DFS block. + (Dhruba Borthakur via tomwhite) + + 37. HADOOP-1467. Remove redundant counters from WordCount example. + (Owen O'Malley via tomwhite) + + 38. HADOOP-1139. Log HDFS block transitions at INFO level, to better + enable diagnosis of problems. (Dhruba Borthakur via cutting) + + 39. HADOOP-1269. Finer grained locking in HDFS namenode. + (Dhruba Borthakur via cutting) + + 40. HADOOP-1438. Improve HDFS documentation, correcting typos and + making images appear in PDF. Also update copyright date for all + docs. (Luke Nezda via cutting) + + 41. HADOOP-1457. Add counters for monitoring task assignments. + (Arun C Murthy via tomwhite) + + 42. HADOOP-1472. Fix so that timed-out tasks are counted as failures + rather than as killed. (Arun C Murthy via cutting) + + 43. HADOOP-1234. Fix a race condition in file cache that caused + tasktracker to not be able to find cached files. + (Arun C Murthy via cutting) + + 44. HADOOP-1482. Fix secondary namenode to roll info port. + (Dhruba Borthakur via cutting) + + 45. HADOOP-1300. Improve removal of excess block replicas to be + rack-aware. Attempts are now made to keep replicas on more + racks. (Hairong Kuang via cutting) + + 46. HADOOP-1417. Disable a few FindBugs checks that generate a lot + of spurious warnings. (Nigel Daley via cutting) + + 47. HADOOP-1320. Rewrite RandomWriter example to bypass reduce. + (Arun C Murthy via cutting) + + 48. HADOOP-1449. Add some examples to contrib/data_join. + (Senthil Subramanian via cutting) + + 49. HADOOP-1459. Fix so that, in HDFS, getFileCacheHints() returns + hostnames instead of IP addresses. (Dhruba Borthakur via cutting) + + 50. HADOOP-1493. Permit specification of "java.library.path" system + property in "mapred.child.java.opts" configuration property. + (Enis Soztutar via cutting) + + 51. HADOOP-1372. Use LocalDirAllocator for HDFS temporary block + files, so that disk space, writability, etc. is considered. + (Dhruba Borthakur via cutting) + + 52. HADOOP-1193. Pool allocation of compression codecs. This + eliminates a memory leak that could cause OutOfMemoryException, + and also substantially improves performance. + (Arun C Murthy via cutting) + + 53. HADOOP-1492. Fix a NullPointerException handling version + mismatch during datanode registration. + (Konstantin Shvachko via cutting) + + 54. HADOOP-1442. Fix handling of zero-length input splits. + (Senthil Subramanian via cutting) + + 55. HADOOP-1444. Fix HDFS block id generation to check pending + blocks for duplicates. (Dhruba Borthakur via cutting) + + 56. HADOOP-1207. Fix FsShell's 'rm' command to not stop when one of + the named files does not exist. (Tsz Wo Sze via cutting) + + 57. HADOOP-1475. Clear tasktracker's file cache before it + re-initializes, to avoid confusion. (omalley via cutting) + + 58. HADOOP-1505. Remove spurious stacktrace in ZlibFactory + introduced in HADOOP-1093. (Michael Stack via tomwhite) + + 59. HADOOP-1484. Permit one to kill jobs from the web ui. Note that + this is disabled by default. One must set + "webinterface.private.actions" to enable this. + (Enis Soztutar via cutting) + + 60. HADOOP-1003. Remove flushing of namenode edit log from primary + namenode lock, increasing namenode throughput. + (Dhruba Borthakur via cutting) + + 61. HADOOP-1023. Add links to searchable mail archives. + (tomwhite via cutting) + + 62. HADOOP-1504. Fix terminate-hadoop-cluster script in contrib/ec2 + to only terminate Hadoop instances, and not other instances + started by the same user. (tomwhite via cutting) + + 63. HADOOP-1462. Improve task progress reporting. Progress reports + are no longer blocking since i/o is performed in a separate + thread. Reporting during sorting and more is also more + consistent. (Vivek Ratan via cutting) + + 64. [ intentionally blank ] + + 65. HADOOP-1453. Remove some unneeded calls to FileSystem#exists() + when opening files, reducing the namenode load somewhat. + (Raghu Angadi via cutting) + + 66. HADOOP-1489. Fix text input truncation bug due to mark/reset. + Add a unittest. (Bwolen Yang via cutting) + + 67. HADOOP-1455. Permit specification of arbitrary job options on + pipes command line. (Devaraj Das via cutting) + + 68. HADOOP-1501. Better randomize sending of block reports to + namenode, so reduce load spikes. (Dhruba Borthakur via cutting) + + 69. HADOOP-1147. Remove @author tags from Java source files. + + 70. HADOOP-1283. Convert most uses of UTF8 in the namenode to be + String. (Konstantin Shvachko via cutting) + + 71. HADOOP-1511. Speedup hbase unit tests. (stack via cutting) + + 72. HADOOP-1517. Remove some synchronization in namenode to permit + finer grained locking previously added. (Konstantin Shvachko via cutting) + + 73. HADOOP-1512. Fix failing TestTextInputFormat on Windows. + (Senthil Subramanian via nigel) + + 74. HADOOP-1518. Add a session id to job metrics, for use by HOD. + (David Bowen via cutting) + + 75. HADOOP-1292. Change 'bin/hadoop fs -get' to first copy files to + a temporary name, then rename them to their final name, so that + failures don't leave partial files. (Tsz Wo Sze via cutting) + + 76. HADOOP-1377. Add support for modification time to FileSystem and + implement in HDFS and local implementations. Also, alter access + to file properties to be through a new FileStatus interface. + (Dhruba Borthakur via cutting) + + 77. HADOOP-1515. Add MultiFileInputFormat, which can pack multiple, + typically small, input files into each split. (Enis Soztutar via cutting) + + 78. HADOOP-1514. Make reducers report progress while waiting for map + outputs, so they're not killed. (Vivek Ratan via cutting) + + 79. HADOOP-1508. Add an Ant task for FsShell operations. Also add + new FsShell commands "touchz", "test" and "stat". + (Chris Douglas via cutting) + + 80. HADOOP-1028. Add log messages for server startup and shutdown. + (Tsz Wo Sze via cutting) + + 81. HADOOP-1485. Add metrics for monitoring shuffle. + (Devaraj Das via cutting) + + 82. HADOOP-1536. Remove file locks from libhdfs tests. + (Dhruba Borthakur via nigel) + + 83. HADOOP-1520. Add appropriate synchronization to FSEditsLog. + (Dhruba Borthakur via nigel) + + 84. HADOOP-1513. Fix a race condition in directory creation. + (Devaraj via omalley) + + 85. HADOOP-1546. Remove spurious column from HDFS web UI. + (Dhruba Borthakur via cutting) + + 86. HADOOP-1556. Make LocalJobRunner delete working files at end of + job run. (Devaraj Das via tomwhite) + + 87. HADOOP-1571. Add contrib lib directories to root build.xml + javadoc classpath. (Michael Stack via tomwhite) + + 88. HADOOP-1554. Log killed tasks to the job history and display them on the + web/ui. (Devaraj Das via omalley) + + 89. HADOOP-1533. Add persistent error logging for distcp. The logs are stored + into a specified hdfs directory. (Senthil Subramanian via omalley) + + 90. HADOOP-1286. Add support to HDFS for distributed upgrades, which + permits coordinated upgrade of datanode data. + (Konstantin Shvachko via cutting) + + 91. HADOOP-1580. Improve contrib/streaming so that subprocess exit + status is displayed for errors. (John Heidemann via cutting) + + 92. HADOOP-1448. In HDFS, randomize lists of non-local block + locations returned to client, so that load is better balanced. + (Hairong Kuang via cutting) + + 93. HADOOP-1578. Fix datanode to send its storage id to namenode + during registration. (Konstantin Shvachko via cutting) + + 94. HADOOP-1584. Fix a bug in GenericWritable which limited it to + 128 types instead of 256. (Espen Amble Kolstad via cutting) + + 95. HADOOP-1473. Make job ids unique across jobtracker restarts. + (omalley via cutting) + + 96. HADOOP-1582. Fix hdfslib to return 0 instead of -1 at + end-of-file, per C conventions. (Christian Kunz via cutting) + + 97. HADOOP-911. Fix a multithreading bug in libhdfs. + (Christian Kunz) + + 98. HADOOP-1486. Fix so that fatal exceptions in namenode cause it + to exit. (Dhruba Borthakur via cutting) + + 99. HADOOP-1470. Factor checksum generation and validation out of + ChecksumFileSystem so that it can be reused by FileSystem's with + built-in checksumming. (Hairong Kuang via cutting) + +100. HADOOP-1590. Use relative urls in jobtracker jsp pages, so that + webapp can be used in non-root contexts. (Thomas Friol via cutting) + +101. HADOOP-1596. Fix the parsing of taskids by streaming and improve the + error reporting. (omalley) + +102. HADOOP-1535. Fix the user-controlled grouping to the reduce function. + (Vivek Ratan via omalley) + +103. HADOOP-1585. Modify GenericWritable to declare the classes as subtypes + of Writable (Espen Amble Kolstad via omalley) + +104. HADOOP-1576. Fix errors in count of completed tasks when + speculative execution is enabled. (Arun C Murthy via cutting) + +105. HADOOP-1598. Fix license headers: adding missing; updating old. + (Enis Soztutar via cutting) + +106. HADOOP-1547. Provide examples for aggregate library. + (Runping Qi via tomwhite) + +107. HADOOP-1570. Permit jobs to enable and disable the use of + hadoop's native library. (Arun C Murthy via cutting) + +108. HADOOP-1433. Add job priority. (Johan Oskarsson via tomwhite) + +109. HADOOP-1597. Add status reports and post-upgrade options to HDFS + distributed upgrade. (Konstantin Shvachko via cutting) + +110. HADOOP-1524. Permit user task logs to appear as they're + created. (Michael Bieniosek via cutting) + +111. HADOOP-1599. Fix distcp bug on Windows. (Senthil Subramanian via cutting) + +112. HADOOP-1562. Add JVM metrics, including GC and logging stats. + (David Bowen via cutting) + +113. HADOOP-1613. Fix "DFS Health" page to display correct time of + last contact. (Dhruba Borthakur via cutting) + +114. HADOOP-1134. Add optimized checksum support to HDFS. Checksums + are now stored with each block, rather than as parallel files. + This reduces the namenode's memory requirements and increases + data integrity. (Raghu Angadi via cutting) + +115. HADOOP-1400. Make JobClient retry requests, so that clients can + survive jobtracker problems. (omalley via cutting) + +116. HADOOP-1564. Add unit tests for HDFS block-level checksums. + (Dhruba Borthakur via cutting) + +117. HADOOP-1620. Reduce the number of abstract FileSystem methods, + simplifying implementations. (cutting) + +118. HADOOP-1625. Fix a "could not move files" exception in datanode. + (Raghu Angadi via cutting) + +119. HADOOP-1624. Fix an infinite loop in datanode. (Raghu Angadi via cutting) + +120. HADOOP-1084. Switch mapred file cache to use file modification + time instead of checksum to detect file changes, as checksums are + no longer easily accessed. (Arun C Murthy via cutting) + +130. HADOOP-1623. Fix an infinite loop when copying directories. + (Dhruba Borthakur via cutting) + +131. HADOOP-1603. Fix a bug in namenode initialization where + default replication is sometimes reset to one on restart. + (Raghu Angadi via cutting) + +132. HADOOP-1635. Remove hardcoded keypair name and fix launch-hadoop-cluster + to support later versions of ec2-api-tools. (Stu Hood via tomwhite) + +133. HADOOP-1638. Fix contrib EC2 scripts to support NAT addressing. + (Stu Hood via tomwhite) + +134. HADOOP-1632. Fix an IllegalArgumentException in fsck. + (Hairong Kuang via cutting) + +135. HADOOP-1619. Fix FSInputChecker to not attempt to read past EOF. + (Hairong Kuang via cutting) + +136. HADOOP-1640. Fix TestDecommission on Windows. + (Dhruba Borthakur via cutting) + +137. HADOOP-1587. Fix TestSymLink to get required system properties. + (Devaraj Das via omalley) + +138. HADOOP-1628. Add block CRC protocol unit tests. (Raghu Angadi via omalley) + +139. HADOOP-1653. FSDirectory code-cleanups. FSDirectory.INode + becomes a static class. (Christophe Taton via dhruba) + +140. HADOOP-1066. Restructure documentation to make more user + friendly. (Connie Kleinjans and Jeff Hammerbacher via cutting) + +141. HADOOP-1551. libhdfs supports setting replication factor and + retrieving modification time of files. (Sameer Paranjpye via dhruba) + +141. HADOOP-1647. FileSystem.getFileStatus returns valid values for "/". + (Dhruba Borthakur via dhruba) + +142. HADOOP-1657. Fix NNBench to ensure that the block size is a + multiple of bytes.per.checksum. (Raghu Angadi via dhruba) + +143. HADOOP-1553. Replace user task output and log capture code to use shell + redirection instead of copier threads in the TaskTracker. Capping the + size of the output is now done via tail in memory and thus should not be + large. The output of the tasklog servlet is not forced into UTF8 and is + not buffered entirely in memory. (omalley) + Configuration changes to hadoop-default.xml: + remove mapred.userlog.num.splits + remove mapred.userlog.purge.splits + change default mapred.userlog.limit.kb to 0 (no limit) + change default mapred.userlog.retain.hours to 24 + Configuration changes to log4j.properties: + remove log4j.appender.TLA.noKeepSplits + remove log4j.appender.TLA.purgeLogSplits + remove log4j.appender.TLA.logsRetainHours + URL changes: + http:///tasklog.jsp -> http://tasklog with + parameters limited to start and end, which may be positive (from + start) or negative (from end). + Environment: + require bash (v2 or later) and tail + +144. HADOOP-1659. Fix a job id/job name mixup. (Arun C. Murthy via omalley) + +145. HADOOP-1665. With HDFS Trash enabled and the same file was created + and deleted more than once, the suceeding deletions creates Trash item + names suffixed with a integer. (Dhruba Borthakur via dhruba) + +146. HADOOP-1666. FsShell object can be used for multiple fs commands. + (Dhruba Borthakur via dhruba) + +147. HADOOP-1654. Remove performance regression introduced by Block CRC. + (Raghu Angadi via dhruba) + +148. HADOOP-1680. Improvements to Block CRC upgrade messages. + (Raghu Angadi via dhruba) + +149. HADOOP-71. Allow Text and SequenceFile Map/Reduce inputs from non-default + filesystems. (omalley) + +150. HADOOP-1568. Expose HDFS as xml/http filesystem to provide cross-version + compatability. (Chris Douglas via omalley) + +151. HADOOP-1668. Added an INCOMPATIBILITY section to CHANGES.txt. (nigel) + +152. HADOOP-1629. Added a upgrade test for HADOOP-1134. + (Raghu Angadi via nigel) + +153. HADOOP-1698. Fix performance problems on map output sorting for jobs + with large numbers of reduces. (Devaraj Das via omalley) + +154. HADOOP-1716. Fix a Pipes wordcount example to remove the 'file:' + schema from its output path. (omalley via cutting) + +155. HADOOP-1714. Fix TestDFSUpgradeFromImage to work on Windows. + (Raghu Angadi via nigel) + +156. HADOOP-1663. Return a non-zero exit code if streaming fails. (Lohit Renu + via omalley) + +157. HADOOP-1712. Fix an unhandled exception on datanode during block + CRC upgrade. (Raghu Angadi via cutting) + +158. HADOOP-1717. Fix TestDFSUpgradeFromImage to work on Solaris. + (nigel via cutting) + +159. HADOOP-1437. Add Eclipse plugin in contrib. + (Eugene Hung and Christophe Taton via cutting) + + +Release 0.13.0 - 2007-06-08 + + 1. HADOOP-1047. Fix TestReplication to succeed more reliably. + (Hairong Kuang via cutting) + + 2. HADOOP-1063. Fix a race condition in MiniDFSCluster test code. + (Hairong Kuang via cutting) + + 3. HADOOP-1101. In web ui, split shuffle statistics from reduce + statistics, and add some task averages. (Devaraj Das via cutting) + + 4. HADOOP-1071. Improve handling of protocol version mismatch in + JobTracker. (Tahir Hashmi via cutting) + + 5. HADOOP-1116. Increase heap size used for contrib unit tests. + (Philippe Gassmann via cutting) + + 6. HADOOP-1120. Add contrib/data_join, tools to simplify joining + data from multiple sources using MapReduce. (Runping Qi via cutting) + + 7. HADOOP-1064. Reduce log level of some DFSClient messages. + (Dhruba Borthakur via cutting) + + 8. HADOOP-1137. Fix StatusHttpServer to work correctly when + resources are in a jar file. (Benjamin Reed via cutting) + + 9. HADOOP-1094. Optimize generated Writable implementations for + records to not allocate a new BinaryOutputArchive or + BinaryInputArchive per call. (Milind Bhandarkar via cutting) + +10. HADOOP-1068. Improve error message for clusters with 0 datanodes. + (Dhruba Borthakur via tomwhite) + +11. HADOOP-1122. Fix divide-by-zero exception in FSNamesystem + chooseTarget method. (Dhruba Borthakur via tomwhite) + +12. HADOOP-1131. Add a closeAll() static method to FileSystem. + (Philippe Gassmann via tomwhite) + +13. HADOOP-1085. Improve port selection in HDFS and MapReduce test + code. Ports are now selected by the OS during testing rather than + by probing for free ports, improving test reliability. + (Arun C Murthy via cutting) + +14. HADOOP-1153. Fix HDFS daemons to correctly stop their threads. + (Konstantin Shvachko via cutting) + +15. HADOOP-1146. Add a counter for reduce input keys and rename the + "reduce input records" counter to be "reduce input groups". + (David Bowen via cutting) + +16. HADOOP-1165. In records, replace idential generated toString + methods with a method on the base class. (Milind Bhandarkar via cutting) + +17. HADOOP-1164. Fix TestReplicationPolicy to specify port zero, so + that a free port is automatically selected. (omalley via cutting) + +18. HADOOP-1166. Add a NullOutputFormat and use it in the + RandomWriter example. (omalley via cutting) + +19. HADOOP-1169. Fix a cut/paste error in CopyFiles utility so that + S3-based source files are correctly copied. (Michael Stack via cutting) + +20. HADOOP-1167. Remove extra synchronization in InMemoryFileSystem. + (omalley via cutting) + +21. HADOOP-1110. Fix an off-by-one error counting map inputs. + (David Bowen via cutting) + +22. HADOOP-1178. Fix a NullPointerException during namenode startup. + (Dhruba Borthakur via cutting) + +23. HADOOP-1011. Fix a ConcurrentModificationException when viewing + job history. (Tahir Hashmi via cutting) + +24. HADOOP-672. Improve help for fs shell commands. + (Dhruba Borthakur via cutting) + +25. HADOOP-1170. Improve datanode performance by removing device + checks from common operations. (Igor Bolotin via cutting) + +26. HADOOP-1090. Fix SortValidator's detection of whether the input + file belongs to the sort-input or sort-output directory. + (Arun C Murthy via tomwhite) + +27. HADOOP-1081. Fix bin/hadoop on Darwin. (Michael Bieniosek via cutting) + +28. HADOOP-1045. Add contrib/hbase, a BigTable-like online database. + (Jim Kellerman via cutting) + +29. HADOOP-1156. Fix a NullPointerException in MiniDFSCluster. + (Hairong Kuang via cutting) + +30. HADOOP-702. Add tools to help automate HDFS upgrades. + (Konstantin Shvachko via cutting) + +31. HADOOP-1163. Fix ganglia metrics to aggregate metrics from different + hosts properly. (Michael Bieniosek via tomwhite) + +32. HADOOP-1194. Make compression style record level for map output + compression. (Arun C Murthy via tomwhite) + +33. HADOOP-1187. Improve DFS Scalability: avoid scanning entire list of + datanodes in getAdditionalBlocks. (Dhruba Borthakur via tomwhite) + +34. HADOOP-1133. Add tool to analyze and debug namenode on a production + cluster. (Dhruba Borthakur via tomwhite) + +35. HADOOP-1151. Remove spurious printing to stderr in streaming + PipeMapRed. (Koji Noguchi via tomwhite) + +36. HADOOP-988. Change namenode to use a single map of blocks to metadata. + (Raghu Angadi via tomwhite) + +37. HADOOP-1203. Change UpgradeUtilities used by DFS tests to use + MiniDFSCluster to start and stop NameNode/DataNodes. + (Nigel Daley via tomwhite) + +38. HADOOP-1217. Add test.timeout property to build.xml, so that + long-running unit tests may be automatically terminated. + (Nigel Daley via cutting) + +39. HADOOP-1149. Improve DFS Scalability: make + processOverReplicatedBlock() a no-op if blocks are not + over-replicated. (Raghu Angadi via tomwhite) + +40. HADOOP-1149. Improve DFS Scalability: optimize getDistance(), + contains(), and isOnSameRack() in NetworkTopology. + (Hairong Kuang via tomwhite) + +41. HADOOP-1218. Make synchronization on TaskTracker's RunningJob + object consistent. (Devaraj Das via tomwhite) + +42. HADOOP-1219. Ignore progress report once a task has reported as + 'done'. (Devaraj Das via tomwhite) + +43. HADOOP-1114. Permit user to specify additional CLASSPATH elements + with a HADOOP_CLASSPATH environment variable. (cutting) + +44. HADOOP-1198. Remove ipc.client.timeout parameter override from + unit test configuration. Using the default is more robust and + has almost the same run time. (Arun C Murthy via tomwhite) + +45. HADOOP-1211. Remove deprecated constructor and unused static + members in DataNode class. (Konstantin Shvachko via tomwhite) + +46. HADOOP-1136. Fix ArrayIndexOutOfBoundsException in + FSNamesystem$UnderReplicatedBlocks add() method. + (Hairong Kuang via tomwhite) + +47. HADOOP-978. Add the client name and the address of the node that + previously started to create the file to the description of + AlreadyBeingCreatedException. (Konstantin Shvachko via tomwhite) + +48. HADOOP-1001. Check the type of keys and values generated by the + mapper against the types specified in JobConf. + (Tahir Hashmi via tomwhite) + +49. HADOOP-971. Improve DFS Scalability: Improve name node performance + by adding a hostname to datanodes map. (Hairong Kuang via tomwhite) + +50. HADOOP-1189. Fix 'No space left on device' exceptions on datanodes. + (Raghu Angadi via tomwhite) + +51. HADOOP-819. Change LineRecordWriter to not insert a tab between + key and value when either is null, and to print nothing when both + are null. (Runping Qi via cutting) + +52. HADOOP-1204. Rename InputFormatBase to be FileInputFormat, and + deprecate InputFormatBase. Also make LineRecordReader easier to + extend. (Runping Qi via cutting) + +53. HADOOP-1213. Improve logging of errors by IPC server, to + consistently include the service name and the call. (cutting) + +54. HADOOP-1238. Fix metrics reporting by TaskTracker to correctly + track maps_running and reduces_running. + (Michael Bieniosek via cutting) + +55. HADOOP-1093. Fix a race condition in HDFS where blocks were + sometimes erased before they were reported written. + (Dhruba Borthakur via cutting) + +56. HADOOP-1239. Add a package name to some testjar test classes. + (Jim Kellerman via cutting) + +57. HADOOP-1241. Fix NullPointerException in processReport when + namenode is restarted. (Dhruba Borthakur via tomwhite) + +58. HADOOP-1244. Fix stop-dfs.sh to no longer incorrectly specify + slaves file for stopping datanode. + (Michael Bieniosek via tomwhite) + +59. HADOOP-1253. Fix ConcurrentModificationException and + NullPointerException in JobControl. + (Johan Oskarson via tomwhite) + +60. HADOOP-1256. Fix NameNode so that multiple DataNodeDescriptors + can no longer be created on startup. (Hairong Kuang via cutting) + +61. HADOOP-1214. Replace streaming classes with new counterparts + from Hadoop core. (Runping Qi via tomwhite) + +62. HADOOP-1250. Move a chmod utility from streaming to FileUtil. + (omalley via cutting) + +63. HADOOP-1258. Fix TestCheckpoint test case to wait for + MiniDFSCluster to be active. (Nigel Daley via tomwhite) + +64. HADOOP-1148. Re-indent all Java source code to consistently use + two spaces per indent level. (cutting) + +65. HADOOP-1251. Add a method to Reporter to get the map InputSplit. + (omalley via cutting) + +66. HADOOP-1224. Fix "Browse the filesystem" link to no longer point + to dead datanodes. (Enis Soztutar via tomwhite) + +67. HADOOP-1154. Fail a streaming task if the threads reading from or + writing to the streaming process fail. (Koji Noguchi via tomwhite) + +68. HADOOP-968. Move shuffle and sort to run in reduce's child JVM, + rather than in TaskTracker. (Devaraj Das via cutting) + +69. HADOOP-1111. Add support for client notification of job + completion. If the job configuration has a job.end.notification.url + property it will make a HTTP GET request to the specified URL. + The number of retries and the interval between retries is also + configurable. (Alejandro Abdelnur via tomwhite) + +70. HADOOP-1275. Fix misspelled job notification property in + hadoop-default.xml. (Alejandro Abdelnur via tomwhite) + +71. HADOOP-1152. Fix race condition in MapOutputCopier.copyOutput file + rename causing possible reduce task hang. + (Tahir Hashmi via tomwhite) + +72. HADOOP-1050. Distinguish between failed and killed tasks so as to + not count a lost tasktracker against the job. + (Arun C Murthy via tomwhite) + +73. HADOOP-1271. Fix StreamBaseRecordReader to be able to log record + data that's not UTF-8. (Arun C Murthy via tomwhite) + +74. HADOOP-1190. Fix unchecked warnings in main Hadoop code. + (tomwhite) + +75. HADOOP-1127. Fix AlreadyBeingCreatedException in namenode for + jobs run with speculative execution. + (Arun C Murthy via tomwhite) + +76. HADOOP-1282. Omnibus HBase patch. Improved tests & configuration. + (Jim Kellerman via cutting) + +77. HADOOP-1262. Make dfs client try to read from a different replica + of the checksum file when a checksum error is detected. + (Hairong Kuang via tomwhite) + +78. HADOOP-1279. Fix JobTracker to maintain list of recently + completed jobs by order of completion, not submission. + (Arun C Murthy via cutting) + +79. HADOOP-1284. In contrib/streaming, permit flexible specification + of field delimiter and fields for partitioning and sorting. + (Runping Qi via cutting) + +80. HADOOP-1176. Fix a bug where reduce would hang when a map had + more than 2GB of output for it. (Arun C Murthy via cutting) + +81. HADOOP-1293. Fix contrib/streaming to print more than the first + twenty lines of standard error. (Koji Noguchi via cutting) + +82. HADOOP-1297. Fix datanode so that requests to remove blocks that + do not exist no longer causes block reports to be re-sent every + second. (Dhruba Borthakur via cutting) + +83. HADOOP-1216. Change MapReduce so that, when numReduceTasks is + zero, map outputs are written directly as final output, skipping + shuffle, sort and reduce. Use this to implement reduce=NONE + option in contrib/streaming. (Runping Qi via cutting) + +84. HADOOP-1294. Fix unchecked warnings in main Hadoop code under + Java 6. (tomwhite) + +85. HADOOP-1299. Fix so that RPC will restart after RPC.stopClient() + has been called. (Michael Stack via cutting) + +86. HADOOP-1278. Improve blacklisting of TaskTrackers by JobTracker, + to reduce false positives. (Arun C Murthy via cutting) + +87. HADOOP-1290. Move contrib/abacus into mapred/lib/aggregate. + (Runping Qi via cutting) + +88. HADOOP-1272. Extract inner classes from FSNamesystem into separate + classes. (Dhruba Borthakur via tomwhite) + +89. HADOOP-1247. Add support to contrib/streaming for aggregate + package, formerly called Abacus. (Runping Qi via cutting) + +90. HADOOP-1061. Fix bug in listing files in the S3 filesystem. + NOTE: this change is not backwards compatible! You should use the + MigrationTool supplied to migrate existing S3 filesystem data to + the new format. Please backup your data first before upgrading + (using 'hadoop distcp' for example). (tomwhite) + +91. HADOOP-1304. Make configurable the maximum number of task + attempts before a job fails. (Devaraj Das via cutting) + +92. HADOOP-1308. Use generics to restrict types when classes are + passed as parameters to JobConf methods. (Michael Bieniosek via cutting) + +93. HADOOP-1312. Fix a ConcurrentModificationException in NameNode + that killed the heartbeat monitoring thread. + (Dhruba Borthakur via cutting) + +94. HADOOP-1315. Clean up contrib/streaming, switching it to use core + classes more and removing unused code. (Runping Qi via cutting) + +95. HADOOP-485. Allow a different comparator for grouping keys in + calls to reduce. (Tahir Hashmi via cutting) + +96. HADOOP-1322. Fix TaskTracker blacklisting to work correctly in + one- and two-node clusters. (Arun C Murthy via cutting) + +97. HADOOP-1144. Permit one to specify a maximum percentage of tasks + that can fail before a job is aborted. The default is zero. + (Arun C Murthy via cutting) + +98. HADOOP-1184. Fix HDFS decomissioning to complete when the only + copy of a block is on a decommissioned node. (Dhruba Borthakur via cutting) + +99. HADOOP-1263. Change DFSClient to retry certain namenode calls + with a random, exponentially increasing backoff time, to avoid + overloading the namenode on, e.g., job start. (Hairong Kuang via cutting) + +100. HADOOP-1325. First complete, functioning version of HBase. + (Jim Kellerman via cutting) + +101. HADOOP-1276. Make tasktracker expiry interval configurable. + (Arun C Murthy via cutting) + +102. HADOOP-1326. Change JobClient#RunJob() to return the job. + (omalley via cutting) + +103. HADOOP-1270. Randomize the fetch of map outputs, speeding the + shuffle. (Arun C Murthy via cutting) + +104. HADOOP-1200. Restore disk checking lost in HADOOP-1170. + (Hairong Kuang via cutting) + +105. HADOOP-1252. Changed MapReduce's allocation of local files to + use round-robin among available devices, rather than a hashcode. + More care is also taken to not allocate files on full or offline + drives. (Devaraj Das via cutting) + +106. HADOOP-1324. Change so that an FSError kills only the task that + generates it rather than the entire task tracker. + (Arun C Murthy via cutting) + +107. HADOOP-1310. Fix unchecked warnings in aggregate code. (tomwhite) + +108. HADOOP-1255. Fix a bug where the namenode falls into an infinite + loop trying to remove a dead node. (Hairong Kuang via cutting) + +109. HADOOP-1160. Fix DistributedFileSystem.close() to close the + underlying FileSystem, correctly aborting files being written. + (Hairong Kuang via cutting) + +110. HADOOP-1341. Fix intermittent failures in HBase unit tests + caused by deadlock. (Jim Kellerman via cutting) + +111. HADOOP-1350. Fix shuffle performance problem caused by forcing + chunked encoding of map outputs. (Devaraj Das via cutting) + +112. HADOOP-1345. Fix HDFS to correctly retry another replica when a + checksum error is encountered. (Hairong Kuang via cutting) + +113. HADOOP-1205. Improve synchronization around HDFS block map. + (Hairong Kuang via cutting) + +114. HADOOP-1353. Fix a potential NullPointerException in namenode. + (Dhruba Borthakur via cutting) + +115. HADOOP-1354. Fix a potential NullPointerException in FsShell. + (Hairong Kuang via cutting) + +116. HADOOP-1358. Fix a potential bug when DFSClient calls skipBytes. + (Hairong Kuang via cutting) + +117. HADOOP-1356. Fix a bug in ValueHistogram. (Runping Qi via cutting) + +118. HADOOP-1363. Fix locking bug in JobClient#waitForCompletion(). + (omalley via cutting) + +119. HADOOP-1368. Fix inconsistent synchronization in JobInProgress. + (omalley via cutting) + +120. HADOOP-1369. Fix inconsistent synchronization in TaskTracker. + (omalley via cutting) + +121. HADOOP-1361. Fix various calls to skipBytes() to check return + value. (Hairong Kuang via cutting) + +122. HADOOP-1388. Fix a potential NullPointerException in web ui. + (Devaraj Das via cutting) + +123. HADOOP-1385. Fix MD5Hash#hashCode() to generally hash to more + than 256 values. (omalley via cutting) + +124. HADOOP-1386. Fix Path to not permit the empty string as a + path, as this has lead to accidental file deletion. Instead + force applications to use "." to name the default directory. + (Hairong Kuang via cutting) + +125. HADOOP-1407. Fix integer division bug in JobInProgress which + meant failed tasks didn't cause the job to fail. + (Arun C Murthy via tomwhite) + +126. HADOOP-1427. Fix a typo that caused GzipCodec to incorrectly use + a very small input buffer. (Espen Amble Kolstad via cutting) + +127. HADOOP-1435. Fix globbing code to no longer use the empty string + to indicate the default directory, per HADOOP-1386. + (Hairong Kuang via cutting) + +128. HADOOP-1411. Make task retry framework handle + AlreadyBeingCreatedException when wrapped as a RemoteException. + (Hairong Kuang via tomwhite) + +129. HADOOP-1242. Improve handling of DFS upgrades. + (Konstantin Shvachko via cutting) + +130. HADOOP-1332. Fix so that TaskTracker exits reliably during unit + tests on Windows. (omalley via cutting) + +131. HADOOP-1431. Fix so that sort progress reporting during map runs + only while sorting, so that stuck maps are correctly terminated. + (Devaraj Das and Arun C Murthy via cutting) + +132. HADOOP-1452. Change TaskTracker.MapOutputServlet.doGet.totalRead + to a long, permitting map outputs to exceed 2^31 bytes. + (omalley via cutting) + +133. HADOOP-1443. Fix a bug opening zero-length files in HDFS. + (Konstantin Shvachko via cutting) + + +Release 0.12.3 - 2007-04-06 + + 1. HADOOP-1162. Fix bug in record CSV and XML serialization of + binary values. (Milind Bhandarkar via cutting) + + 2. HADOOP-1123. Fix NullPointerException in LocalFileSystem when + trying to recover from a checksum error. + (Hairong Kuang & Nigel Daley via tomwhite) + + 3. HADOOP-1177. Fix bug where IOException in MapOutputLocation.getFile + was not being logged. (Devaraj Das via tomwhite) + + 4. HADOOP-1175. Fix bugs in JSP for displaying a task's log messages. + (Arun C Murthy via cutting) + + 5. HADOOP-1191. Fix map tasks to wait until sort progress thread has + stopped before reporting the task done. (Devaraj Das via cutting) + + 6. HADOOP-1192. Fix an integer overflow bug in FSShell's 'dus' + command and a performance problem in HDFS's implementation of it. + (Hairong Kuang via cutting) + + 7. HADOOP-1105. Fix reducers to make "progress" while iterating + through values. (Devaraj Das & Owen O'Malley via tomwhite) + + 8. HADOOP-1179. Make Task Tracker close index file as soon as the read + is done when serving get-map-output requests. + (Devaraj Das via tomwhite) + + +Release 0.12.2 - 2007-23-17 + + 1. HADOOP-1135. Fix bug in block report processing which may cause + the namenode to delete blocks. (Dhruba Borthakur via tomwhite) + + 2. HADOOP-1145. Make XML serializer and deserializer classes public + in record package. (Milind Bhandarkar via cutting) + + 3. HADOOP-1140. Fix a deadlock in metrics. (David Bowen via cutting) + + 4. HADOOP-1150. Fix streaming -reducer and -mapper to give them + defaults. (Owen O'Malley via tomwhite) + + +Release 0.12.1 - 2007-03-17 + + 1. HADOOP-1035. Fix a StackOverflowError in FSDataSet. + (Raghu Angadi via cutting) + + 2. HADOOP-1053. Fix VInt representation of negative values. Also + remove references in generated record code to methods outside of + the record package and improve some record documentation. + (Milind Bhandarkar via cutting) + + 3. HADOOP-1067. Compile fails if Checkstyle jar is present in lib + directory. Also remove dependency on a particular Checkstyle + version number. (tomwhite) + + 4. HADOOP-1060. Fix an IndexOutOfBoundsException in the JobTracker + that could cause jobs to hang. (Arun C Murthy via cutting) + + 5. HADOOP-1077. Fix a race condition fetching map outputs that could + hang reduces. (Devaraj Das via cutting) + + 6. HADOOP-1083. Fix so that when a cluster restarts with a missing + datanode, its blocks are replicated. (Hairong Kuang via cutting) + + 7. HADOOP-1082. Fix a NullPointerException in ChecksumFileSystem. + (Hairong Kuang via cutting) + + 8. HADOOP-1088. Fix record serialization of negative values. + (Milind Bhandarkar via cutting) + + 9. HADOOP-1080. Fix bug in bin/hadoop on Windows when native + libraries are present. (ab via cutting) + +10. HADOOP-1091. Fix a NullPointerException in MetricsRecord. + (David Bowen via tomwhite) + +11. HADOOP-1092. Fix a NullPointerException in HeartbeatMonitor + thread. (Hairong Kuang via tomwhite) + +12. HADOOP-1112. Fix a race condition in Hadoop metrics. + (David Bowen via tomwhite) + +13. HADOOP-1108. Checksummed file system should retry reading if a + different replica is found when handling ChecksumException. + (Hairong Kuang via tomwhite) + +14. HADOOP-1070. Fix a problem with number of racks and datanodes + temporarily doubling. (Konstantin Shvachko via tomwhite) + +15. HADOOP-1099. Fix NullPointerException in JobInProgress. + (Gautam Kowshik via tomwhite) + +16. HADOOP-1115. Fix bug where FsShell copyToLocal doesn't + copy directories. (Hairong Kuang via tomwhite) + +17. HADOOP-1109. Fix NullPointerException in StreamInputFormat. + (Koji Noguchi via tomwhite) + +18. HADOOP-1117. Fix DFS scalability: when the namenode is + restarted it consumes 80% CPU. (Dhruba Borthakur via + tomwhite) + +19. HADOOP-1089. Make the C++ version of write and read v-int + agree with the Java versions. (Milind Bhandarkar via + tomwhite) + +20. HADOOP-1096. Rename InputArchive and OutputArchive and + make them public. (Milind Bhandarkar via tomwhite) + +21. HADOOP-1128. Fix missing progress information in map tasks. + (Espen Amble Kolstad, Andrzej Bialecki, and Owen O'Malley + via tomwhite) + +22. HADOOP-1129. Fix DFSClient to not hide IOExceptions in + flush method. (Hairong Kuang via tomwhite) + +23. HADOOP-1126. Optimize CPU usage for under replicated blocks + when cluster restarts. (Hairong Kuang via tomwhite) + + +Release 0.12.0 - 2007-03-02 + + 1. HADOOP-975. Separate stdout and stderr from tasks. + (Arun C Murthy via cutting) + + 2. HADOOP-982. Add some setters and a toString() method to + BytesWritable. (omalley via cutting) + + 3. HADOOP-858. Move contrib/smallJobsBenchmark to src/test, removing + obsolete bits. (Nigel Daley via cutting) + + 4. HADOOP-992. Fix MiniMR unit tests to use MiniDFS when specified, + rather than the local FS. (omalley via cutting) + + 5. HADOOP-954. Change use of metrics to use callback mechanism. + Also rename utility class Metrics to MetricsUtil. + (David Bowen & Nigel Daley via cutting) + + 6. HADOOP-893. Improve HDFS client's handling of dead datanodes. + The set is no longer reset with each block, but rather is now + maintained for the life of an open file. (Raghu Angadi via cutting) + + 7. HADOOP-882. Upgrade to jets3t version 0.5, used by the S3 + FileSystem. This version supports retries. (Michael Stack via cutting) + + 8. HADOOP-977. Send task's stdout and stderr to JobClient's stdout + and stderr respectively, with each line tagged by the task's name. + (Arun C Murthy via cutting) + + 9. HADOOP-761. Change unit tests to not use /tmp. (Nigel Daley via cutting) + +10. HADOOP-1007. Make names of metrics used in Hadoop unique. + (Nigel Daley via cutting) + +11. HADOOP-491. Change mapred.task.timeout to be per-job, and make a + value of zero mean no timeout. Also change contrib/streaming to + disable task timeouts. (Arun C Murthy via cutting) + +12. HADOOP-1010. Add Reporter.NULL, a Reporter implementation that + does nothing. (Runping Qi via cutting) + +13. HADOOP-923. In HDFS NameNode, move replication computation to a + separate thread, to improve heartbeat processing time. + (Dhruba Borthakur via cutting) + +14. HADOOP-476. Rewrite contrib/streaming command-line processing, + improving parameter validation. (Sanjay Dahiya via cutting) + +15. HADOOP-973. Improve error messages in Namenode. This should help + to track down a problem that was appearing as a + NullPointerException. (Dhruba Borthakur via cutting) + +16. HADOOP-649. Fix so that jobs with no tasks are not lost. + (Thomas Friol via cutting) + +17. HADOOP-803. Reduce memory use by HDFS namenode, phase I. + (Raghu Angadi via cutting) + +18. HADOOP-1021. Fix MRCaching-based unit tests on Windows. + (Nigel Daley via cutting) + +19. HADOOP-889. Remove duplicate code from HDFS unit tests. + (Milind Bhandarkar via cutting) + +20. HADOOP-943. Improve HDFS's fsck command to display the filename + for under-replicated blocks. (Dhruba Borthakur via cutting) + +21. HADOOP-333. Add validator for sort benchmark output. + (Arun C Murthy via cutting) + +22. HADOOP-947. Improve performance of datanode decomissioning. + (Dhruba Borthakur via cutting) + +23. HADOOP-442. Permit one to specify hosts allowed to connect to + namenode and jobtracker with include and exclude files. (Wendy + Chien via cutting) + +24. HADOOP-1017. Cache constructors, for improved performance. + (Ron Bodkin via cutting) + +25. HADOOP-867. Move split creation out of JobTracker to client. + Splits are now saved in a separate file, read by task processes + directly, so that user code is no longer required in the + JobTracker. (omalley via cutting) + +26. HADOOP-1006. Remove obsolete '-local' option from test code. + (Gautam Kowshik via cutting) + +27. HADOOP-952. Create a public (shared) Hadoop EC2 AMI. + The EC2 scripts now support launch of public AMIs. + (tomwhite) + +28. HADOOP-1025. Remove some obsolete code in ipc.Server. (cutting) + +29. HADOOP-997. Implement S3 retry mechanism for failed block + transfers. This includes a generic retry mechanism for use + elsewhere in Hadoop. (tomwhite) + +30. HADOOP-990. Improve HDFS support for full datanode volumes. + (Raghu Angadi via cutting) + +31. HADOOP-564. Replace uses of "dfs://" URIs with the more standard + "hdfs://". (Wendy Chien via cutting) + +32. HADOOP-1030. In unit tests, unify setting of ipc.client.timeout. + Also increase the value used from one to two seconds, in hopes of + making tests complete more reliably. (cutting) + +33. HADOOP-654. Stop assigning tasks to a tasktracker if it has + failed more than a specified number in the job. + (Arun C Murthy via cutting) + +34. HADOOP-985. Change HDFS to identify nodes by IP address rather + than by DNS hostname. (Raghu Angadi via cutting) + +35. HADOOP-248. Optimize location of map outputs to not use random + probes. (Devaraj Das via cutting) + +36. HADOOP-1029. Fix streaming's input format to correctly seek to + the start of splits. (Arun C Murthy via cutting) + +37. HADOOP-492. Add per-job and per-task counters. These are + incremented via the Reporter interface and available through the + web ui and the JobClient API. The mapreduce framework maintains a + few basic counters, and applications may add their own. Counters + are also passed to the metrics system. + (David Bowen via cutting) + +38. HADOOP-1034. Fix datanode to better log exceptions. + (Philippe Gassmann via cutting) + +39. HADOOP-878. In contrib/streaming, fix reducer=NONE to work with + multiple maps. (Arun C Murthy via cutting) + +40. HADOOP-1039. In HDFS's TestCheckpoint, avoid restarting + MiniDFSCluster so often, speeding this test. (Dhruba Borthakur via cutting) + +41. HADOOP-1040. Update RandomWriter example to use counters and + user-defined input and output formats. (omalley via cutting) + +42. HADOOP-1027. Fix problems with in-memory merging during shuffle + and re-enable this optimization. (Devaraj Das via cutting) + +43. HADOOP-1036. Fix exception handling in TaskTracker to keep tasks + from being lost. (Arun C Murthy via cutting) + +44. HADOOP-1042. Improve the handling of failed map output fetches. + (Devaraj Das via cutting) + +45. HADOOP-928. Make checksums optional per FileSystem. + (Hairong Kuang via cutting) + +46. HADOOP-1044. Fix HDFS's TestDecommission to not spuriously fail. + (Wendy Chien via cutting) + +47. HADOOP-972. Optimize HDFS's rack-aware block placement algorithm. + (Hairong Kuang via cutting) + +48. HADOOP-1043. Optimize shuffle, increasing parallelism. + (Devaraj Das via cutting) + +49. HADOOP-940. Improve HDFS's replication scheduling. + (Dhruba Borthakur via cutting) + +50. HADOOP-1020. Fix a bug in Path resolution, and a with unit tests + on Windows. (cutting) + +51. HADOOP-941. Enhance record facility. + (Milind Bhandarkar via cutting) + +52. HADOOP-1000. Fix so that log messages in task subprocesses are + not written to a task's standard error. (Arun C Murthy via cutting) + +53. HADOOP-1037. Fix bin/slaves.sh, which currently only works with + /bin/bash, to specify /bin/bash rather than /bin/sh. (cutting) + +54. HADOOP-1046. Clean up tmp from partially received stale block files. (ab) + +55. HADOOP-1041. Optimize mapred counter implementation. Also group + counters by their declaring Enum. (David Bowen via cutting) + +56. HADOOP-1032. Permit one to specify jars that will be cached + across multiple jobs. (Gautam Kowshik via cutting) + +57. HADOOP-1051. Add optional checkstyle task to build.xml. To use + this developers must download the (LGPL'd) checkstyle jar + themselves. (tomwhite via cutting) + +58. HADOOP-1049. Fix a race condition in IPC client. + (Devaraj Das via cutting) + +60. HADOOP-1056. Check HDFS include/exclude node lists with both IP + address and hostname. (Wendy Chien via cutting) + +61. HADOOP-994. In HDFS, limit the number of blocks invalidated at + once. Large lists were causing datenodes to timeout. + (Dhruba Borthakur via cutting) + +62. HADOOP-432. Add a trash feature, disabled by default. When + enabled, the FSShell 'rm' command will move things to a trash + directory in the filesystem. In HDFS, a thread periodically + checkpoints the trash and removes old checkpoints. (cutting) + + +Release 0.11.2 - 2007-02-16 + + 1. HADOOP-1009. Fix an infinite loop in the HDFS namenode. + (Dhruba Borthakur via cutting) + + 2. HADOOP-1014. Disable in-memory merging during shuffle, as this is + causing data corruption. (Devaraj Das via cutting) + + +Release 0.11.1 - 2007-02-09 + + 1. HADOOP-976. Make SequenceFile.Metadata public. (Runping Qi via cutting) + + 2. HADOOP-917. Fix a NullPointerException in SequenceFile's merger + with large map outputs. (omalley via cutting) + + 3. HADOOP-984. Fix a bug in shuffle error handling introduced by + HADOOP-331. If a map output is unavailable, the job tracker is + once more informed. (Arun C Murthy via cutting) + + 4. HADOOP-987. Fix a problem in HDFS where blocks were not removed + from neededReplications after a replication target was selected. + (Hairong Kuang via cutting) + +Release 0.11.0 - 2007-02-02 + + 1. HADOOP-781. Remove methods deprecated in 0.10 that are no longer + widely used. (cutting) + + 2. HADOOP-842. Change HDFS protocol so that the open() method is + passed the client hostname, to permit the namenode to order block + locations on the basis of network topology. + (Hairong Kuang via cutting) + + 3. HADOOP-852. Add an ant task to compile record definitions, and + use it to compile record unit tests. (Milind Bhandarkar via cutting) + + 4. HADOOP-757. Fix "Bad File Descriptor" exception in HDFS client + when an output file is closed twice. (Raghu Angadi via cutting) + + 5. [ intentionally blank ] + + 6. HADOOP-890. Replace dashes in metric names with underscores, + for better compatibility with some monitoring systems. + (Nigel Daley via cutting) + + 7. HADOOP-801. Add to jobtracker a log of task completion events. + (Sanjay Dahiya via cutting) + + 8. HADOOP-855. In HDFS, try to repair files with checksum errors. + An exception is still thrown, but corrupt blocks are now removed + when they have replicas. (Wendy Chien via cutting) + + 9. HADOOP-886. Reduce number of timer threads created by metrics API + by pooling contexts. (Nigel Daley via cutting) + +10. HADOOP-897. Add a "javac.args" property to build.xml that permits + one to pass arbitrary options to javac. (Milind Bhandarkar via cutting) + +11. HADOOP-899. Update libhdfs for changes in HADOOP-871. + (Sameer Paranjpye via cutting) + +12. HADOOP-905. Remove some dead code from JobClient. (cutting) + +13. HADOOP-902. Fix a NullPointerException in HDFS client when + closing output streams. (Raghu Angadi via cutting) + +14. HADOOP-735. Switch generated record code to use BytesWritable to + represent fields of type 'buffer'. (Milind Bhandarkar via cutting) + +15. HADOOP-830. Improve mapreduce merge performance by buffering and + merging multiple map outputs as they arrive at reduce nodes before + they're written to disk. (Devaraj Das via cutting) + +16. HADOOP-908. Add a new contrib package, Abacus, that simplifies + counting and aggregation, built on MapReduce. (Runping Qi via cutting) + +17. HADOOP-901. Add support for recursive renaming to the S3 filesystem. + (Tom White via cutting) + +18. HADOOP-912. Fix a bug in TaskTracker.isIdle() that was + sporadically causing unit test failures. (Arun C Murthy via cutting) + +19. HADOOP-909. Fix the 'du' command to correctly compute the size of + FileSystem directory trees. (Hairong Kuang via cutting) + +20. HADOOP-731. When a checksum error is encountered on a file stored + in HDFS, try another replica of the data, if any. + (Wendy Chien via cutting) + +21. HADOOP-732. Add support to SequenceFile for arbitrary metadata, + as a set of attribute value pairs. (Runping Qi via cutting) + +22. HADOOP-929. Fix PhasedFileSystem to pass configuration to + underlying FileSystem. (Sanjay Dahiya via cutting) + +23. HADOOP-935. Fix contrib/abacus to not delete pre-existing output + files, but rather to fail in this case. (Runping Qi via cutting) + +24. HADOOP-936. More metric renamings, as in HADOOP-890. + (Nigel Daley via cutting) + +25. HADOOP-856. Fix HDFS's fsck command to not report that + non-existent filesystems are healthy. (Milind Bhandarkar via cutting) + +26. HADOOP-602. Remove the dependency on Lucene's PriorityQueue + utility, by copying it into Hadoop. This facilitates using Hadoop + with different versions of Lucene without worrying about CLASSPATH + order. (Milind Bhandarkar via cutting) + +27. [ intentionally blank ] + +28. HADOOP-227. Add support for backup namenodes, which periodically + get snapshots of the namenode state. (Dhruba Borthakur via cutting) + +29. HADOOP-884. Add scripts in contrib/ec2 to facilitate running + Hadoop on an Amazon's EC2 cluster. (Tom White via cutting) + +30. HADOOP-937. Change the namenode to request re-registration of + datanodes in more circumstances. (Hairong Kuang via cutting) + +31. HADOOP-922. Optimize small forward seeks in HDFS. If data is has + likely already in flight, skip ahead rather than re-opening the + block. (Dhruba Borthakur via cutting) + +32. HADOOP-961. Add a 'job -events' sub-command that prints job + events, including task completions and failures. (omalley via cutting) + +33. HADOOP-959. Fix namenode snapshot code added in HADOOP-227 to + work on Windows. (Dhruba Borthakur via cutting) + +34. HADOOP-934. Fix TaskTracker to catch metrics exceptions that were + causing heartbeats to fail. (Arun Murthy via cutting) + +35. HADOOP-881. Fix JobTracker web interface to display the correct + number of task failures. (Sanjay Dahiya via cutting) + +36. HADOOP-788. Change contrib/streaming to subclass TextInputFormat, + permitting it to take advantage of native compression facilities. + (Sanjay Dahiya via cutting) + +37. HADOOP-962. In contrib/ec2: make scripts executable in tar file; + add a README; make the environment file use a template. + (Tom White via cutting) + +38. HADOOP-549. Fix a NullPointerException in TaskReport's + serialization. (omalley via cutting) + +39. HADOOP-963. Fix remote exceptions to have the stack trace of the + caller thread, not the IPC listener thread. (omalley via cutting) + +40. HADOOP-967. Change RPC clients to start sending a version header. + (omalley via cutting) + +41. HADOOP-964. Fix a bug introduced by HADOOP-830 where jobs failed + whose comparators and/or i/o types were in the job's jar. + (Dennis Kubes via cutting) + +42. HADOOP-969. Fix a deadlock in JobTracker. (omalley via cutting) + +43. HADOOP-862. Add support for the S3 FileSystem to the CopyFiles + tool. (Michael Stack via cutting) + +44. HADOOP-965. Fix IsolationRunner so that job's jar can be found. + (Dennis Kubes via cutting) + +45. HADOOP-309. Fix two NullPointerExceptions in StatusHttpServer. + (navychen via cutting) + +46. HADOOP-692. Add rack awareness to HDFS's placement of blocks. + (Hairong Kuang via cutting) + + +Release 0.10.1 - 2007-01-10 + + 1. HADOOP-857. Fix S3 FileSystem implementation to permit its use + for MapReduce input and output. (Tom White via cutting) + + 2. HADOOP-863. Reduce logging verbosity introduced by HADOOP-813. + (Devaraj Das via cutting) + + 3. HADOOP-815. Fix memory leaks in JobTracker. (Arun C Murthy via cutting) + + 4. HADOOP-600. Fix a race condition in JobTracker. + (Arun C Murthy via cutting) + + 5. HADOOP-864. Fix 'bin/hadoop -jar' to operate correctly when + hadoop.tmp.dir does not yet exist. (omalley via cutting) + + 6. HADOOP-866. Fix 'dfs -get' command to remove existing crc files, + if any. (Milind Bhandarkar via cutting) + + 7. HADOOP-871. Fix a bug in bin/hadoop setting JAVA_LIBRARY_PATH. + (Arun C Murthy via cutting) + + 8. HADOOP-868. Decrease the number of open files during map, + respecting io.sort.fa ctor. (Devaraj Das via cutting) + + 9. HADOOP-865. Fix S3 FileSystem so that partially created files can + be deleted. (Tom White via cutting) + +10. HADOOP-873. Pass java.library.path correctly to child processes. + (omalley via cutting) + +11. HADOOP-851. Add support for the LZO codec. This is much faster + than the default, zlib-based compression, but it is only available + when the native library is built. (Arun C Murthy via cutting) + +12. HADOOP-880. Fix S3 FileSystem to remove directories. + (Tom White via cutting) + +13. HADOOP-879. Fix InputFormatBase to handle output generated by + MapFileOutputFormat. (cutting) + +14. HADOOP-659. In HDFS, prioritize replication of blocks based on + current replication level. Blocks which are severely + under-replicated should be further replicated before blocks which + are less under-replicated. (Hairong Kuang via cutting) + +15. HADOOP-726. Deprecate FileSystem locking methods. They are not + currently usable. Locking should eventually provided as an + independent service. (Raghu Angadi via cutting) + +16. HADOOP-758. Fix exception handling during reduce so that root + exceptions are not masked by exceptions in cleanups. + (Raghu Angadi via cutting) + + +Release 0.10.0 - 2007-01-05 + + 1. HADOOP-763. Change DFS namenode benchmark to not use MapReduce. + (Nigel Daley via cutting) + + 2. HADOOP-777. Use fully-qualified hostnames for tasktrackers and + datanodes. (Mahadev Konar via cutting) + + 3. HADOOP-621. Change 'dfs -cat' to exit sooner when output has been + closed. (Dhruba Borthakur via cutting) + + 4. HADOOP-752. Rationalize some synchronization in DFS namenode. + (Dhruba Borthakur via cutting) + + 5. HADOOP-629. Fix RPC services to better check the protocol name and + version. (omalley via cutting) + + 6. HADOOP-774. Limit the number of invalid blocks returned with + heartbeats by the namenode to datanodes. Transmitting and + processing very large invalid block lists can tie up both the + namenode and datanode for too long. (Dhruba Borthakur via cutting) + + 7. HADOOP-738. Change 'dfs -get' command to not create CRC files by + default, adding a -crc option to force their creation. + (Milind Bhandarkar via cutting) + + 8. HADOOP-676. Improved exceptions and error messages for common job + input specification errors. (Sanjay Dahiya via cutting) + + 9. [Included in 0.9.2 release] + +10. HADOOP-756. Add new dfsadmin option to wait for filesystem to be + operational. (Dhruba Borthakur via cutting) + +11. HADOOP-770. Fix jobtracker web interface to display, on restart, + jobs that were running when it was last stopped. + (Sanjay Dahiya via cutting) + +12. HADOOP-331. Write all map outputs to a single file with an index, + rather than to a separate file per reduce task. This should both + speed the shuffle and make things more scalable. + (Devaraj Das via cutting) + +13. HADOOP-818. Fix contrib unit tests to not depend on core unit + tests. (omalley via cutting) + +14. HADOOP-786. Log common exception at debug level. + (Sanjay Dahiya via cutting) + +15. HADOOP-796. Provide more convenient access to failed task + information in the web interface. (Sanjay Dahiya via cutting) + +16. HADOOP-764. Reduce memory allocations in namenode some. + (Dhruba Borthakur via cutting) + +17. HADOOP-802. Update description of mapred.speculative.execution to + mention reduces. (Nigel Daley via cutting) + +18. HADOOP-806. Include link to datanodes on front page of namenode + web interface. (Raghu Angadi via cutting) + +19. HADOOP-618. Make JobSubmissionProtocol public. + (Arun C Murthy via cutting) + +20. HADOOP-782. Fully remove killed tasks. (Arun C Murthy via cutting) + +21. HADOOP-792. Fix 'dfs -mv' to return correct status. + (Dhruba Borthakur via cutting) + +22. HADOOP-673. Give each task its own working directory again. + (Mahadev Konar via cutting) + +23. HADOOP-571. Extend the syntax of Path to be a URI; to be + optionally qualified with a scheme and authority. The scheme + determines the FileSystem implementation, while the authority + determines the FileSystem instance. New FileSystem + implementations may be provided by defining an fs..impl + property, naming the FileSystem implementation class. This + permits easy integration of new FileSystem implementations. + (cutting) + +24. HADOOP-720. Add an HDFS white paper to website. + (Dhruba Borthakur via cutting) + +25. HADOOP-794. Fix a divide-by-zero exception when a job specifies + zero map tasks. (omalley via cutting) + +26. HADOOP-454. Add a 'dfs -dus' command that provides summary disk + usage. (Hairong Kuang via cutting) + +27. HADOOP-574. Add an Amazon S3 implementation of FileSystem. To + use this, one need only specify paths of the form + s3://id:secret@bucket/. Alternately, the AWS access key id and + secret can be specified in your config, with the properties + fs.s3.awsAccessKeyId and fs.s3.awsSecretAccessKey. + (Tom White via cutting) + +28. HADOOP-824. Rename DFSShell to be FsShell, since it applies + generically to all FileSystem implementations. (cutting) + +29. HADOOP-813. Fix map output sorting to report progress, so that + sorts which take longer than the task timeout do not fail. + (Devaraj Das via cutting) + +30. HADOOP-825. Fix HDFS daemons when configured with new URI syntax. + (omalley via cutting) + +31. HADOOP-596. Fix a bug in phase reporting during reduce. + (Sanjay Dahiya via cutting) + +32. HADOOP-811. Add a utility, MultithreadedMapRunner. + (Alejandro Abdelnur via cutting) + +33. HADOOP-829. Within HDFS, clearly separate three different + representations for datanodes: one for RPCs, one for + namenode-internal use, and one for namespace persistence. + (Dhruba Borthakur via cutting) + +34. HADOOP-823. Fix problem starting datanode when not all configured + data directories exist. (Bryan Pendleton via cutting) + +35. HADOOP-451. Add a Split interface. CAUTION: This incompatibly + changes the InputFormat and RecordReader interfaces. Not only is + FileSplit replaced with Split, but a FileSystem parameter is no + longer passed in several methods, input validation has changed, + etc. (omalley via cutting) + +36. HADOOP-814. Optimize locking in namenode. (Dhruba Borthakur via cutting) + +37. HADOOP-738. Change 'fs -put' and 'fs -get' commands to accept + standard input and output, respectively. Standard i/o is + specified by a file named '-'. (Wendy Chien via cutting) + +38. HADOOP-835. Fix a NullPointerException reading record-compressed + SequenceFiles. (Hairong Kuang via cutting) + +39. HADOOP-836. Fix a MapReduce bug on Windows, where the wrong + FileSystem was used. Also add a static FileSystem.getLocal() + method and better Path checking in HDFS, to help avoid such issues + in the future. (omalley via cutting) + +40. HADOOP-837. Improve RunJar utility to unpack jar file + hadoop.tmp.dir, rather than the system temporary directory. + (Hairong Kuang via cutting) + +41. HADOOP-841. Fix native library to build 32-bit version even when + on a 64-bit host, if a 32-bit JVM is used. (Arun C Murthy via cutting) + +42. HADOOP-838. Fix tasktracker to pass java.library.path to + sub-processes, so that libhadoop.a is found. + (Arun C Murthy via cutting) + +43. HADOOP-844. Send metrics messages on a fixed-delay schedule + instead of a fixed-rate schedule. (David Bowen via cutting) + +44. HADOOP-849. Fix OutOfMemory exceptions in TaskTracker due to a + file handle leak in SequenceFile. (Devaraj Das via cutting) + +45. HADOOP-745. Fix a synchronization bug in the HDFS namenode. + (Dhruba Borthakur via cutting) + +46. HADOOP-850. Add Writable implementations for variable-length + integers. (ab via cutting) + +47. HADOOP-525. Add raw comparators to record types. This greatly + improves record sort performance. (Milind Bhandarkar via cutting) + +48. HADOOP-628. Fix a problem with 'fs -cat' command, where some + characters were replaced with question marks. (Wendy Chien via cutting) + +49. HADOOP-804. Reduce verbosity of MapReduce logging. + (Sanjay Dahiya via cutting) + +50. HADOOP-853. Rename 'site' to 'docs', in preparation for inclusion + in releases. (cutting) + +51. HADOOP-371. Include contrib jars and site documentation in + distributions. Also add contrib and example documentation to + distributed javadoc, in separate sections. (Nigel Daley via cutting) + +52. HADOOP-846. Report progress during entire map, as sorting of + intermediate outputs may happen at any time, potentially causing + task timeouts. (Devaraj Das via cutting) + +53. HADOOP-840. In task tracker, queue task cleanups and perform them + in a separate thread. (omalley & Mahadev Konar via cutting) + +54. HADOOP-681. Add to HDFS the ability to decommission nodes. This + causes their blocks to be re-replicated on other nodes, so that + they may be removed from a cluster. (Dhruba Borthakur via cutting) + +55. HADOOP-470. In HDFS web ui, list the datanodes containing each + copy of a block. (Hairong Kuang via cutting) + +56. HADOOP-700. Change bin/hadoop to only include core jar file on + classpath, not example, test, etc. Also rename core jar to + hadoop-${version}-core.jar so that it can be more easily + identified. (Nigel Daley via cutting) + +57. HADOOP-619. Extend InputFormatBase to accept individual files and + glob patterns as MapReduce inputs, not just directories. Also + change contrib/streaming to use this. (Sanjay Dahia via cutting) + + +Release 0.9.2 - 2006-12-15 + + 1. HADOOP-639. Restructure InterTrackerProtocol to make task + accounting more reliable. (Arun C Murthy via cutting) + + 2. HADOOP-827. Turn off speculative execution by default, since it's + currently broken. (omalley via cutting) + + 3. HADOOP-791. Fix a deadlock in the task tracker. + (Mahadev Konar via cutting) + + +Release 0.9.1 - 2006-12-06 + + 1. HADOOP-780. Use ReflectionUtils to instantiate key and value + objects. (ab) + + 2. HADOOP-779. Fix contrib/streaming to work correctly with gzipped + input files. (Hairong Kuang via cutting) + + +Release 0.9.0 - 2006-12-01 + + 1. HADOOP-655. Remove most deprecated code. A few deprecated things + remain, notably UTF8 and some methods that are still required. + Also cleaned up constructors for SequenceFile, MapFile, SetFile, + and ArrayFile a bit. (cutting) + + 2. HADOOP-565. Upgrade to Jetty version 6. (Sanjay Dahiya via cutting) + + 3. HADOOP-682. Fix DFS format command to work correctly when + configured with a non-existent directory. (Sanjay Dahiya via cutting) + + 4. HADOOP-645. Fix a bug in contrib/streaming when -reducer is NONE. + (Dhruba Borthakur via cutting) + + 5. HADOOP-687. Fix a classpath bug in bin/hadoop that blocked the + servers from starting. (Sameer Paranjpye via omalley) + + 6. HADOOP-683. Remove a script dependency on bash, so it works with + dash, the new default for /bin/sh on Ubuntu. (James Todd via cutting) + + 7. HADOOP-382. Extend unit tests to run multiple datanodes. + (Milind Bhandarkar via cutting) + + 8. HADOOP-604. Fix some synchronization issues and a + NullPointerException in DFS datanode. (Raghu Angadi via cutting) + + 9. HADOOP-459. Fix memory leaks and a host of other issues with + libhdfs. (Sameer Paranjpye via cutting) + +10. HADOOP-694. Fix a NullPointerException in jobtracker. + (Mahadev Konar via cutting) + +11. HADOOP-637. Fix a memory leak in the IPC server. Direct buffers + are not collected like normal buffers, and provided little + advantage. (Raghu Angadi via cutting) + +12. HADOOP-696. Fix TestTextInputFormat unit test to not rely on the + order of directory listings. (Sameer Paranjpye via cutting) + +13. HADOOP-611. Add support for iterator-based merging to + SequenceFile. (Devaraj Das via cutting) + +14. HADOOP-688. Move DFS administrative commands to a separate + command named 'dfsadmin'. (Dhruba Borthakur via cutting) + +15. HADOOP-708. Fix test-libhdfs to return the correct status, so + that failures will break the build. (Nigel Daley via cutting) + +16. HADOOP-646. Fix namenode to handle edits files larger than 2GB. + (Milind Bhandarkar via cutting) + +17. HADOOP-705. Fix a bug in the JobTracker when failed jobs were + not completely cleaned up. (Mahadev Konar via cutting) + +18. HADOOP-613. Perform final merge while reducing. This removes one + sort pass over the data and should consequently significantly + decrease overall processing time. (Devaraj Das via cutting) + +19. HADOOP-661. Make each job's configuration visible through the web + ui. (Arun C Murthy via cutting) + +20. HADOOP-489. In MapReduce, separate user logs from system logs. + Each task's log output is now available through the web ui. (Arun + C Murthy via cutting) + +21. HADOOP-712. Fix record io's xml serialization to correctly handle + control-characters. (Milind Bhandarkar via cutting) + +22. HADOOP-668. Improvements to the web-based DFS browser. + (Hairong Kuang via cutting) + +23. HADOOP-715. Fix build.xml so that test logs are written in build + directory, rather than in CWD. (Arun C Murthy via cutting) + +24. HADOOP-538. Add support for building an optional native library, + libhadoop.so, that improves the performance of zlib-based + compression. To build this, specify -Dcompile.native to Ant. + (Arun C Murthy via cutting) + +25. HADOOP-610. Fix an problem when the DFS block size is configured + to be smaller than the buffer size, typically only when debugging. + (Milind Bhandarkar via cutting) + +26. HADOOP-695. Fix a NullPointerException in contrib/streaming. + (Hairong Kuang via cutting) + +27. HADOOP-652. In DFS, when a file is deleted, the block count is + now decremented. (Vladimir Krokhmalyov via cutting) + +28. HADOOP-725. In DFS, optimize block placement algorithm, + previously a performance bottleneck. (Milind Bhandarkar via cutting) + +29. HADOOP-723. In MapReduce, fix a race condition during the + shuffle, which resulted in FileNotFoundExceptions. (omalley via cutting) + +30. HADOOP-447. In DFS, fix getBlockSize(Path) to work with relative + paths. (Raghu Angadi via cutting) + +31. HADOOP-733. Make exit codes in DFShell consistent and add a unit + test. (Dhruba Borthakur via cutting) + +32. HADOOP-709. Fix contrib/streaming to work with commands that + contain control characters. (Dhruba Borthakur via cutting) + +33. HADOOP-677. In IPC, permit a version header to be transmitted + when connections are established. This will permit us to change + the format of IPC requests back-compatibly in subsequent releases. + (omalley via cutting) + +34. HADOOP-699. Fix DFS web interface so that filesystem browsing + works correctly, using the right port number. Also add support + for sorting datanode list by various columns. + (Raghu Angadi via cutting) + +35. HADOOP-76. Implement speculative reduce. Now when a job is + configured for speculative execution, both maps and reduces will + execute speculatively. Reduce outputs are written to temporary + location and moved to the final location when reduce is complete. + (Sanjay Dahiya via cutting) + +36. HADOOP-736. Roll back to Jetty 5.1.4, due to performance problems + with Jetty 6.0.1. + +37. HADOOP-739. Fix TestIPC to use different port number, making it + more reliable. (Nigel Daley via cutting) + +38. HADOOP-749. Fix a NullPointerException in jobfailures.jsp. + (omalley via cutting) + +39. HADOOP-747. Fix record serialization to work correctly when + records are embedded in Maps. (Milind Bhandarkar via cutting) + +40. HADOOP-698. Fix HDFS client not to retry the same datanode on + read failures. (Milind Bhandarkar via cutting) + +41. HADOOP-689. Add GenericWritable, to facilitate polymorphism in + MapReduce, SequenceFile, etc. (Feng Jiang via cutting) + +42. HADOOP-430. Stop datanode's HTTP server when registration with + namenode fails. (Wendy Chien via cutting) + +43. HADOOP-750. Fix a potential race condition during mapreduce + shuffle. (omalley via cutting) + +44. HADOOP-728. Fix contrib/streaming-related issues, including + '-reducer NONE'. (Sanjay Dahiya via cutting) + + +Release 0.8.0 - 2006-11-03 + + 1. HADOOP-477. Extend contrib/streaming to scan the PATH environment + variables when resolving executable program names. + (Dhruba Borthakur via cutting) + + 2. HADOOP-583. In DFSClient, reduce the log level of re-connect + attempts from 'info' to 'debug', so they are not normally shown. + (Konstantin Shvachko via cutting) + + 3. HADOOP-498. Re-implement DFS integrity checker to run server-side, + for much improved performance. (Milind Bhandarkar via cutting) + + 4. HADOOP-586. Use the jar name for otherwise un-named jobs. + (Sanjay Dahiya via cutting) + + 5. HADOOP-514. Make DFS heartbeat interval configurable. + (Milind Bhandarkar via cutting) + + 6. HADOOP-588. Fix logging and accounting of failed tasks. + (Sanjay Dahiya via cutting) + + 7. HADOOP-462. Improve command line parsing in DFSShell, so that + incorrect numbers of arguments result in informative errors rather + than ArrayOutOfBoundsException. (Dhruba Borthakur via cutting) + + 8. HADOOP-561. Fix DFS so that one replica of each block is written + locally, if possible. This was the intent, but there as a bug. + (Dhruba Borthakur via cutting) + + 9. HADOOP-610. Fix TaskTracker to survive more exceptions, keeping + tasks from becoming lost. (omalley via cutting) + +10. HADOOP-625. Add a servlet to all http daemons that displays a + stack dump, useful for debugging. (omalley via cutting) + +11. HADOOP-554. Fix DFSShell to return -1 for errors. + (Dhruba Borthakur via cutting) + +12. HADOOP-626. Correct the documentation in the NNBench example + code, and also remove a mistaken call there. + (Nigel Daley via cutting) + +13. HADOOP-634. Add missing license to many files. + (Nigel Daley via cutting) + +14. HADOOP-627. Fix some synchronization problems in MiniMRCluster + that sometimes caused unit tests to fail. (Nigel Daley via cutting) + +15. HADOOP-563. Improve the NameNode's lease policy so that leases + are held for one hour without renewal (instead of one minute). + However another attempt to create the same file will still succeed + if the lease has not been renewed within a minute. This prevents + communication or scheduling problems from causing a write to fail + for up to an hour, barring some other process trying to create the + same file. (Dhruba Borthakur via cutting) + +16. HADOOP-635. In DFSShell, permit specification of multiple files + as the source for file copy and move commands. + (Dhruba Borthakur via cutting) + +17. HADOOP-641. Change NameNode to request a fresh block report from + a re-discovered DataNode, so that no-longer-needed replications + are stopped promptly. (Konstantin Shvachko via cutting) + +18. HADOOP-642. Change IPC client to specify an explicit connect + timeout. (Konstantin Shvachko via cutting) + +19. HADOOP-638. Fix an unsynchronized access to TaskTracker's + internal state. (Nigel Daley via cutting) + +20. HADOOP-624. Fix servlet path to stop a Jetty warning on startup. + (omalley via cutting) + +21. HADOOP-578. Failed tasks are no longer placed at the end of the + task queue. This was originally done to work around other + problems that have now been fixed. Re-executing failed tasks + sooner causes buggy jobs to fail faster. (Sanjay Dahiya via cutting) + +22. HADOOP-658. Update source file headers per Apache policy. (cutting) + +23. HADOOP-636. Add MapFile & ArrayFile constructors which accept a + Progressable, and pass it down to SequenceFile. This permits + reduce tasks which use MapFile to still report progress while + writing blocks to the filesystem. (cutting) + +24. HADOOP-576. Enable contrib/streaming to use the file cache. Also + extend the cache to permit symbolic links to cached items, rather + than local file copies. (Mahadev Konar via cutting) + +25. HADOOP-482. Fix unit tests to work when a cluster is running on + the same machine, removing port conflicts. (Wendy Chien via cutting) + +26. HADOOP-90. Permit dfs.name.dir to list multiple directories, + where namenode data is to be replicated. (Milind Bhandarkar via cutting) + +27. HADOOP-651. Fix DFSCk to correctly pass parameters to the servlet + on the namenode. (Milind Bhandarkar via cutting) + +28. HADOOP-553. Change main() routines of DataNode and NameNode to + log exceptions rather than letting the JVM print them to standard + error. Also, change the hadoop-daemon.sh script to rotate + standard i/o log files. (Raghu Angadi via cutting) + +29. HADOOP-399. Fix javadoc warnings. (Nigel Daley via cutting) + +30. HADOOP-599. Fix web ui and command line to correctly report DFS + filesystem size statistics. Also improve web layout. + (Raghu Angadi via cutting) + +31. HADOOP-660. Permit specification of junit test output format. + (Nigel Daley via cutting) + +32. HADOOP-663. Fix a few unit test issues. (Mahadev Konar via cutting) + +33. HADOOP-664. Cause entire build to fail if libhdfs tests fail. + (Nigel Daley via cutting) + +34. HADOOP-633. Keep jobtracker from dying when job initialization + throws exceptions. Also improve exception handling in a few other + places and add more informative thread names. + (omalley via cutting) + +35. HADOOP-669. Fix a problem introduced by HADOOP-90 that can cause + DFS to lose files. (Milind Bhandarkar via cutting) + +36. HADOOP-373. Consistently check the value returned by + FileSystem.mkdirs(). (Wendy Chien via cutting) + +37. HADOOP-670. Code cleanups in some DFS internals: use generic + types, replace Vector with ArrayList, etc. + (Konstantin Shvachko via cutting) + +38. HADOOP-647. Permit map outputs to use a different compression + type than the job output. (omalley via cutting) + +39. HADOOP-671. Fix file cache to check for pre-existence before + creating . (Mahadev Konar via cutting) + +40. HADOOP-665. Extend many DFSShell commands to accept multiple + arguments. Now commands like "ls", "rm", etc. will operate on + multiple files. (Dhruba Borthakur via cutting) + + +Release 0.7.2 - 2006-10-18 + + 1. HADOOP-607. Fix a bug where classes included in job jars were not + found by tasks. (Mahadev Konar via cutting) + + 2. HADOOP-609. Add a unit test that checks that classes in job jars + can be found by tasks. Also modify unit tests to specify multiple + local directories. (Mahadev Konar via cutting) + + +Release 0.7.1 - 2006-10-11 + + 1. HADOOP-593. Fix a NullPointerException in the JobTracker. + (omalley via cutting) + + 2. HADOOP-592. Fix a NullPointerException in the IPC Server. Also + consistently log when stale calls are discarded. (omalley via cutting) + + 3. HADOOP-594. Increase the DFS safe-mode threshold from .95 to + .999, so that nearly all blocks must be reported before filesystem + modifications are permitted. (Konstantin Shvachko via cutting) + + 4. HADOOP-598. Fix tasks to retry when reporting completion, so that + a single RPC timeout won't fail a task. (omalley via cutting) + + 5. HADOOP-597. Fix TaskTracker to not discard map outputs for errors + in transmitting them to reduce nodes. (omalley via cutting) + + +Release 0.7.0 - 2006-10-06 + + 1. HADOOP-243. Fix rounding in the display of task and job progress + so that things are not shown to be 100% complete until they are in + fact finished. (omalley via cutting) + + 2. HADOOP-438. Limit the length of absolute paths in DFS, since the + file format used to store pathnames has some limitations. + (Wendy Chien via cutting) + + 3. HADOOP-530. Improve error messages in SequenceFile when keys or + values are of the wrong type. (Hairong Kuang via cutting) + + 4. HADOOP-288. Add a file caching system and use it in MapReduce to + cache job jar files on slave nodes. (Mahadev Konar via cutting) + + 5. HADOOP-533. Fix unit test to not modify conf directory. + (Hairong Kuang via cutting) + + 6. HADOOP-527. Permit specification of the local address that various + Hadoop daemons should bind to. (Philippe Gassmann via cutting) + + 7. HADOOP-542. Updates to contrib/streaming: reformatted source code, + on-the-fly merge sort, a fix for HADOOP-540, etc. + (Michel Tourn via cutting) + + 8. HADOOP-545. Remove an unused config file parameter. + (Philippe Gassmann via cutting) + + 9. HADOOP-548. Add an Ant property "test.output" to build.xml that + causes test output to be logged to the console. (omalley via cutting) + +10. HADOOP-261. Record an error message when map output is lost. + (omalley via cutting) + +11. HADOOP-293. Report the full list of task error messages in the + web ui, not just the most recent. (omalley via cutting) + +12. HADOOP-551. Restore JobClient's console printouts to only include + a maximum of one update per one percent of progress. + (omalley via cutting) + +13. HADOOP-306. Add a "safe" mode to DFS. The name node enters this + when less than a specified percentage of file data is complete. + Currently safe mode is only used on startup, but eventually it + will also be entered when datanodes disconnect and file data + becomes incomplete. While in safe mode no filesystem + modifications are permitted and block replication is inhibited. + (Konstantin Shvachko via cutting) + +14. HADOOP-431. Change 'dfs -rm' to not operate recursively and add a + new command, 'dfs -rmr' which operates recursively. + (Sameer Paranjpye via cutting) + +15. HADOOP-263. Include timestamps for job transitions. The web + interface now displays the start and end times of tasks and the + start times of sorting and reducing for reduce tasks. Also, + extend ObjectWritable to handle enums, so that they can be passed + as RPC parameters. (Sanjay Dahiya via cutting) + +16. HADOOP-556. Contrib/streaming: send keep-alive reports to task + tracker every 10 seconds rather than every 100 records, to avoid + task timeouts. (Michel Tourn via cutting) + +17. HADOOP-547. Fix reduce tasks to ping tasktracker while copying + data, rather than only between copies, avoiding task timeouts. + (Sanjay Dahiya via cutting) + +18. HADOOP-537. Fix src/c++/libhdfs build process to create files in + build/, no longer modifying the source tree. + (Arun C Murthy via cutting) + +19. HADOOP-487. Throw a more informative exception for unknown RPC + hosts. (Sameer Paranjpye via cutting) + +20. HADOOP-559. Add file name globbing (pattern matching) support to + the FileSystem API, and use it in DFSShell ('bin/hadoop dfs') + commands. (Hairong Kuang via cutting) + +21. HADOOP-508. Fix a bug in FSDataInputStream. Incorrect data was + returned after seeking to a random location. + (Milind Bhandarkar via cutting) + +22. HADOOP-560. Add a "killed" task state. This can be used to + distinguish kills from other failures. Task state has also been + converted to use an enum type instead of an int, uncovering a bug + elsewhere. The web interface is also updated to display killed + tasks. (omalley via cutting) + +23. HADOOP-423. Normalize Paths containing directories named "." and + "..", using the standard, unix interpretation. Also add checks in + DFS, prohibiting the use of "." or ".." as directory or file + names. (Wendy Chien via cutting) + +24. HADOOP-513. Replace map output handling with a servlet, rather + than a JSP page. This fixes an issue where + IllegalStateException's were logged, sets content-length + correctly, and better handles some errors. (omalley via cutting) + +25. HADOOP-552. Improved error checking when copying map output files + to reduce nodes. (omalley via cutting) + +26. HADOOP-566. Fix scripts to work correctly when accessed through + relative symbolic links. (Lee Faris via cutting) + +27. HADOOP-519. Add positioned read methods to FSInputStream. These + permit one to read from a stream without moving its position, and + can hence be performed by multiple threads at once on a single + stream. Implement an optimized version for DFS and local FS. + (Milind Bhandarkar via cutting) + +28. HADOOP-522. Permit block compression with MapFile and SetFile. + Since these formats are always sorted, block compression can + provide a big advantage. (cutting) + +29. HADOOP-567. Record version and revision information in builds. A + package manifest is added to the generated jar file containing + version information, and a VersionInfo utility is added that + includes further information, including the build date and user, + and the subversion revision and repository. A 'bin/hadoop + version' comand is added to show this information, and it is also + added to various web interfaces. (omalley via cutting) + +30. HADOOP-568. Fix so that errors while initializing tasks on a + tasktracker correctly report the task as failed to the jobtracker, + so that it will be rescheduled. (omalley via cutting) + +31. HADOOP-550. Disable automatic UTF-8 validation in Text. This + permits, e.g., TextInputFormat to again operate on non-UTF-8 data. + (Hairong and Mahadev via cutting) + +32. HADOOP-343. Fix mapred copying so that a failed tasktracker + doesn't cause other copies to slow. (Sameer Paranjpye via cutting) + +33. HADOOP-239. Add a persistent job history mechanism, so that basic + job statistics are not lost after 24 hours and/or when the + jobtracker is restarted. (Sanjay Dahiya via cutting) + +34. HADOOP-506. Ignore heartbeats from stale task trackers. + (Sanjay Dahiya via cutting) + +35. HADOOP-255. Discard stale, queued IPC calls. Do not process + calls whose clients will likely time out before they receive a + response. When the queue is full, new calls are now received and + queued, and the oldest calls are discarded, so that, when servers + get bogged down, they no longer develop a backlog on the socket. + This should improve some DFS namenode failure modes. + (omalley via cutting) + +36. HADOOP-581. Fix datanode to not reset itself on communications + errors with the namenode. If a request to the namenode fails, the + datanode should retry, not restart. This reduces the load on the + namenode, since restarts cause a resend of the block report. + (omalley via cutting) + + +Release 0.6.2 - 2006-09-18 + +1. HADOOP-532. Fix a bug reading value-compressed sequence files, + where an exception was thrown reporting that the full value had not + been read. (omalley via cutting) + +2. HADOOP-534. Change the default value class in JobConf to be Text + instead of the now-deprecated UTF8. This fixes the Grep example + program, which was updated to use Text, but relies on this + default. (Hairong Kuang via cutting) + + +Release 0.6.1 - 2006-09-13 + + 1. HADOOP-520. Fix a bug in libhdfs, where write failures were not + correctly returning error codes. (Arun C Murthy via cutting) + + 2. HADOOP-523. Fix a NullPointerException when TextInputFormat is + explicitly specified. Also add a test case for this. + (omalley via cutting) + + 3. HADOOP-521. Fix another NullPointerException finding the + ClassLoader when using libhdfs. (omalley via cutting) + + 4. HADOOP-526. Fix a NullPointerException when attempting to start + two datanodes in the same directory. (Milind Bhandarkar via cutting) + + 5. HADOOP-529. Fix a NullPointerException when opening + value-compressed sequence files generated by pre-0.6.0 Hadoop. + (omalley via cutting) + + +Release 0.6.0 - 2006-09-08 + + 1. HADOOP-427. Replace some uses of DatanodeDescriptor in the DFS + web UI code with DatanodeInfo, the preferred public class. + (Devaraj Das via cutting) + + 2. HADOOP-426. Fix streaming contrib module to work correctly on + Solaris. This was causing nightly builds to fail. + (Michel Tourn via cutting) + + 3. HADOOP-400. Improvements to task assignment. Tasks are no longer + re-run on nodes where they have failed (unless no other node is + available). Also, tasks are better load-balanced among nodes. + (omalley via cutting) + + 4. HADOOP-324. Fix datanode to not exit when a disk is full, but + rather simply to fail writes. (Wendy Chien via cutting) + + 5. HADOOP-434. Change smallJobsBenchmark to use standard Hadoop + scripts. (Sanjay Dahiya via cutting) + + 6. HADOOP-453. Fix a bug in Text.setCapacity(). (siren via cutting) + + + 7. HADOOP-450. Change so that input types are determined by the + RecordReader rather than specified directly in the JobConf. This + facilitates jobs with a variety of input types. + + WARNING: This contains incompatible API changes! The RecordReader + interface has two new methods that all user-defined InputFormats + must now define. Also, the values returned by TextInputFormat are + no longer of class UTF8, but now of class Text. + + 8. HADOOP-436. Fix an error-handling bug in the web ui. + (Devaraj Das via cutting) + + 9. HADOOP-455. Fix a bug in Text, where DEL was not permitted. + (Hairong Kuang via cutting) + +10. HADOOP-456. Change the DFS namenode to keep a persistent record + of the set of known datanodes. This will be used to implement a + "safe mode" where filesystem changes are prohibited when a + critical percentage of the datanodes are unavailable. + (Konstantin Shvachko via cutting) + +11. HADOOP-322. Add a job control utility. This permits one to + specify job interdependencies. Each job is submitted only after + the jobs it depends on have successfully completed. + (Runping Qi via cutting) + +12. HADOOP-176. Fix a bug in IntWritable.Comparator. + (Dick King via cutting) + +13. HADOOP-421. Replace uses of String in recordio package with Text + class, for improved handling of UTF-8 data. + (Milind Bhandarkar via cutting) + +14. HADOOP-464. Improved error message when job jar not found. + (Michel Tourn via cutting) + +15. HADOOP-469. Fix /bin/bash specifics that have crept into our + /bin/sh scripts since HADOOP-352. + (Jean-Baptiste Quenot via cutting) + +16. HADOOP-468. Add HADOOP_NICENESS environment variable to set + scheduling priority for daemons. (Vetle Roeim via cutting) + +17. HADOOP-473. Fix TextInputFormat to correctly handle more EOL + formats. Things now work correctly with CR, LF or CRLF. + (Dennis Kubes & James White via cutting) + +18. HADOOP-461. Make Java 1.5 an explicit requirement. (cutting) + +19. HADOOP-54. Add block compression to SequenceFile. One may now + specify that blocks of keys and values are compressed together, + improving compression for small keys and values. + SequenceFile.Writer's constructor is now deprecated and replaced + with a factory method. (Arun C Murthy via cutting) + +20. HADOOP-281. Prohibit DFS files that are also directories. + (Wendy Chien via cutting) + +21. HADOOP-486. Add the job username to JobStatus instances returned + by JobClient. (Mahadev Konar via cutting) + +22. HADOOP-437. contrib/streaming: Add support for gzipped inputs. + (Michel Tourn via cutting) + +23. HADOOP-463. Add variable expansion to config files. + Configuration property values may now contain variable + expressions. A variable is referenced with the syntax + '${variable}'. Variables values are found first in the + configuration, and then in Java system properties. The default + configuration is modified so that temporary directories are now + under ${hadoop.tmp.dir}, which is, by default, + /tmp/hadoop-${user.name}. (Michel Tourn via cutting) + +24. HADOOP-419. Fix a NullPointerException finding the ClassLoader + when using libhdfs. (omalley via cutting) + +25. HADOOP-460. Fix contrib/smallJobsBenchmark to use Text instead of + UTF8. (Sanjay Dahiya via cutting) + +26. HADOOP-196. Fix Configuration(Configuration) constructor to work + correctly. (Sami Siren via cutting) + +27. HADOOP-501. Fix Configuration.toString() to handle URL resources. + (Thomas Friol via cutting) + +28. HADOOP-499. Reduce the use of Strings in contrib/streaming, + replacing them with Text for better performance. + (Hairong Kuang via cutting) + +29. HADOOP-64. Manage multiple volumes with a single DataNode. + Previously DataNode would create a separate daemon per configured + volume, each with its own connection to the NameNode. Now all + volumes are handled by a single DataNode daemon, reducing the load + on the NameNode. (Milind Bhandarkar via cutting) + +30. HADOOP-424. Fix MapReduce so that jobs which generate zero splits + do not fail. (Fr??d??ric Bertin via cutting) + +31. HADOOP-408. Adjust some timeouts and remove some others so that + unit tests run faster. (cutting) + +32. HADOOP-507. Fix an IllegalAccessException in DFS. + (omalley via cutting) + +33. HADOOP-320. Fix so that checksum files are correctly copied when + the destination of a file copy is a directory. + (Hairong Kuang via cutting) + +34. HADOOP-286. In DFSClient, avoid pinging the NameNode with + renewLease() calls when no files are being written. + (Konstantin Shvachko via cutting) + +35. HADOOP-312. Close idle IPC connections. All IPC connections were + cached forever. Now, after a connection has been idle for more + than a configurable amount of time (one second by default), the + connection is closed, conserving resources on both client and + server. (Devaraj Das via cutting) + +36. HADOOP-497. Permit the specification of the network interface and + nameserver to be used when determining the local hostname + advertised by datanodes and tasktrackers. + (Lorenzo Thione via cutting) + +37. HADOOP-441. Add a compression codec API and extend SequenceFile + to use it. This will permit the use of alternate compression + codecs in SequenceFile. (Arun C Murthy via cutting) + +38. HADOOP-483. Improvements to libhdfs build and documentation. + (Arun C Murthy via cutting) + +39. HADOOP-458. Fix a memory corruption bug in libhdfs. + (Arun C Murthy via cutting) + +40. HADOOP-517. Fix a contrib/streaming bug in end-of-line detection. + (Hairong Kuang via cutting) + +41. HADOOP-474. Add CompressionCodecFactory, and use it in + TextInputFormat and TextOutputFormat. Compressed input files are + automatically decompressed when they have the correct extension. + Output files will, when output compression is specified, be + generated with an approprate extension. Also add a gzip codec and + fix problems with UTF8 text inputs. (omalley via cutting) + + +Release 0.5.0 - 2006-08-04 + + 1. HADOOP-352. Fix shell scripts to use /bin/sh instead of + /bin/bash, for better portability. + (Jean-Baptiste Quenot via cutting) + + 2. HADOOP-313. Permit task state to be saved so that single tasks + may be manually re-executed when debugging. (omalley via cutting) + + 3. HADOOP-339. Add method to JobClient API listing jobs that are + not yet complete, i.e., that are queued or running. + (Mahadev Konar via cutting) + + 4. HADOOP-355. Updates to the streaming contrib module, including + API fixes, making reduce optional, and adding an input type for + StreamSequenceRecordReader. (Michel Tourn via cutting) + + 5. HADOOP-358. Fix a NPE bug in Path.equals(). + (Fr??d??ric Bertin via cutting) + + 6. HADOOP-327. Fix ToolBase to not call System.exit() when + exceptions are thrown. (Hairong Kuang via cutting) + + 7. HADOOP-359. Permit map output to be compressed. + (omalley via cutting) + + 8. HADOOP-341. Permit input URI to CopyFiles to use the HTTP + protocol. This lets one, e.g., more easily copy log files into + DFS. (Arun C Murthy via cutting) + + 9. HADOOP-361. Remove unix dependencies from streaming contrib + module tests, making them pure java. (Michel Tourn via cutting) + +10. HADOOP-354. Make public methods to stop DFS daemons. + (Barry Kaplan via cutting) + +11. HADOOP-252. Add versioning to RPC protocols. + (Milind Bhandarkar via cutting) + +12. HADOOP-356. Add contrib to "compile" and "test" build targets, so + that this code is better maintained. (Michel Tourn via cutting) + +13. HADOOP-307. Add smallJobsBenchmark contrib module. This runs + lots of small jobs, in order to determine per-task overheads. + (Sanjay Dahiya via cutting) + +14. HADOOP-342. Add a tool for log analysis: Logalyzer. + (Arun C Murthy via cutting) + +15. HADOOP-347. Add web-based browsing of DFS content. The namenode + redirects browsing requests to datanodes. Content requests are + redirected to datanodes where the data is local when possible. + (Devaraj Das via cutting) + +16. HADOOP-351. Make Hadoop IPC kernel independent of Jetty. + (Devaraj Das via cutting) + +17. HADOOP-237. Add metric reporting to DFS and MapReduce. With only + minor configuration changes, one can now monitor many Hadoop + system statistics using Ganglia or other monitoring systems. + (Milind Bhandarkar via cutting) + +18. HADOOP-376. Fix datanode's HTTP server to scan for a free port. + (omalley via cutting) + +19. HADOOP-260. Add --config option to shell scripts, specifying an + alternate configuration directory. (Milind Bhandarkar via cutting) + +20. HADOOP-381. Permit developers to save the temporary files for + tasks whose names match a regular expression, to facilliate + debugging. (omalley via cutting) + +21. HADOOP-344. Fix some Windows-related problems with DF. + (Konstantin Shvachko via cutting) + +22. HADOOP-380. Fix reduce tasks to poll less frequently for map + outputs. (Mahadev Konar via cutting) + +23. HADOOP-321. Refactor DatanodeInfo, in preparation for + HADOOP-306. (Konstantin Shvachko & omalley via cutting) + +24. HADOOP-385. Fix some bugs in record io code generation. + (Milind Bhandarkar via cutting) + +25. HADOOP-302. Add new Text class to replace UTF8, removing + limitations of that class. Also refactor utility methods for + writing zero-compressed integers (VInts and VLongs). + (Hairong Kuang via cutting) + +26. HADOOP-335. Refactor DFS namespace/transaction logging in + namenode. (Konstantin Shvachko via cutting) + +27. HADOOP-375. Fix handling of the datanode HTTP daemon's port so + that multiple datanode's can be run on a single host. + (Devaraj Das via cutting) + +28. HADOOP-386. When removing excess DFS block replicas, remove those + on nodes with the least free space first. + (Johan Oskarson via cutting) + +29. HADOOP-389. Fix intermittent failures of mapreduce unit tests. + Also fix some build dependencies. + (Mahadev & Konstantin via cutting) + +30. HADOOP-362. Fix a problem where jobs hang when status messages + are recieved out-of-order. (omalley via cutting) + +31. HADOOP-394. Change order of DFS shutdown in unit tests to + minimize errors logged. (Konstantin Shvachko via cutting) + +32. HADOOP-396. Make DatanodeID implement Writable. + (Konstantin Shvachko via cutting) + +33. HADOOP-377. Permit one to add URL resources to a Configuration. + (Jean-Baptiste Quenot via cutting) + +34. HADOOP-345. Permit iteration over Configuration key/value pairs. + (Michel Tourn via cutting) + +35. HADOOP-409. Streaming contrib module: make configuration + properties available to commands as environment variables. + (Michel Tourn via cutting) + +36. HADOOP-369. Add -getmerge option to dfs command that appends all + files in a directory into a single local file. + (Johan Oskarson via cutting) + +37. HADOOP-410. Replace some TreeMaps with HashMaps in DFS, for + a 17% performance improvement. (Milind Bhandarkar via cutting) + +38. HADOOP-411. Add unit tests for command line parser. + (Hairong Kuang via cutting) + +39. HADOOP-412. Add MapReduce input formats that support filtering + of SequenceFile data, including sampling and regex matching. + Also, move JobConf.newInstance() to a new utility class. + (Hairong Kuang via cutting) + +40. HADOOP-226. Fix fsck command to properly consider replication + counts, now that these can vary per file. (Bryan Pendleton via cutting) + +41. HADOOP-425. Add a Python MapReduce example, using Jython. + (omalley via cutting) + + +Release 0.4.0 - 2006-06-28 + + 1. HADOOP-298. Improved progress reports for CopyFiles utility, the + distributed file copier. (omalley via cutting) + + 2. HADOOP-299. Fix the task tracker, permitting multiple jobs to + more easily execute at the same time. (omalley via cutting) + + 3. HADOOP-250. Add an HTTP user interface to the namenode, running + on port 50070. (Devaraj Das via cutting) + + 4. HADOOP-123. Add MapReduce unit tests that run a jobtracker and + tasktracker, greatly increasing code coverage. + (Milind Bhandarkar via cutting) + + 5. HADOOP-271. Add links from jobtracker's web ui to tasktracker's + web ui. Also attempt to log a thread dump of child processes + before they're killed. (omalley via cutting) + + 6. HADOOP-210. Change RPC server to use a selector instead of a + thread per connection. This should make it easier to scale to + larger clusters. Note that this incompatibly changes the RPC + protocol: clients and servers must both be upgraded to the new + version to ensure correct operation. (Devaraj Das via cutting) + + 7. HADOOP-311. Change DFS client to retry failed reads, so that a + single read failure will not alone cause failure of a task. + (omalley via cutting) + + 8. HADOOP-314. Remove the "append" phase when reducing. Map output + files are now directly passed to the sorter, without first + appending them into a single file. Now, the first third of reduce + progress is "copy" (transferring map output to reduce nodes), the + middle third is "sort" (sorting map output) and the last third is + "reduce" (generating output). Long-term, the "sort" phase will + also be removed. (omalley via cutting) + + 9. HADOOP-316. Fix a potential deadlock in the jobtracker. + (omalley via cutting) + +10. HADOOP-319. Fix FileSystem.close() to remove the FileSystem + instance from the cache. (Hairong Kuang via cutting) + +11. HADOOP-135. Fix potential deadlock in JobTracker by acquiring + locks in a consistent order. (omalley via cutting) + +12. HADOOP-278. Check for existence of input directories before + starting MapReduce jobs, making it easier to debug this common + error. (omalley via cutting) + +13. HADOOP-304. Improve error message for + UnregisterdDatanodeException to include expected node name. + (Konstantin Shvachko via cutting) + +14. HADOOP-305. Fix TaskTracker to ask for new tasks as soon as a + task is finished, rather than waiting for the next heartbeat. + This improves performance when tasks are short. + (Mahadev Konar via cutting) + +15. HADOOP-59. Add support for generic command line options. One may + now specify the filesystem (-fs), the MapReduce jobtracker (-jt), + a config file (-conf) or any configuration property (-D). The + "dfs", "fsck", "job", and "distcp" commands currently support + this, with more to be added. (Hairong Kuang via cutting) + +16. HADOOP-296. Permit specification of the amount of reserved space + on a DFS datanode. One may specify both the percentage free and + the number of bytes. (Johan Oskarson via cutting) + +17. HADOOP-325. Fix a problem initializing RPC parameter classes, and + remove the workaround used to initialize classes. + (omalley via cutting) + +18. HADOOP-328. Add an option to the "distcp" command to ignore read + errors while copying. (omalley via cutting) + +19. HADOOP-27. Don't allocate tasks to trackers whose local free + space is too low. (Johan Oskarson via cutting) + +20. HADOOP-318. Keep slow DFS output from causing task timeouts. + This incompatibly changes some public interfaces, adding a + parameter to OutputFormat.getRecordWriter() and the new method + Reporter.progress(), but it makes lots of tasks succeed that were + previously failing. (Milind Bhandarkar via cutting) + + +Release 0.3.2 - 2006-06-09 + + 1. HADOOP-275. Update the streaming contrib module to use log4j for + its logging. (Michel Tourn via cutting) + + 2. HADOOP-279. Provide defaults for log4j logging parameters, so + that things still work reasonably when Hadoop-specific system + properties are not provided. (omalley via cutting) + + 3. HADOOP-280. Fix a typo in AllTestDriver which caused the wrong + test to be run when "DistributedFSCheck" was specified. + (Konstantin Shvachko via cutting) + + 4. HADOOP-240. DFS's mkdirs() implementation no longer logs a warning + when the directory already exists. (Hairong Kuang via cutting) + + 5. HADOOP-285. Fix DFS datanodes to be able to re-join the cluster + after the connection to the namenode is lost. (omalley via cutting) + + 6. HADOOP-277. Fix a race condition when creating directories. + (Sameer Paranjpye via cutting) + + 7. HADOOP-289. Improved exception handling in DFS datanode. + (Konstantin Shvachko via cutting) + + 8. HADOOP-292. Fix client-side logging to go to standard error + rather than standard output, so that it can be distinguished from + application output. (omalley via cutting) + + 9. HADOOP-294. Fixed bug where conditions for retrying after errors + in the DFS client were reversed. (omalley via cutting) + + +Release 0.3.1 - 2006-06-05 + + 1. HADOOP-272. Fix a bug in bin/hadoop setting log + parameters. (omalley & cutting) + + 2. HADOOP-274. Change applications to log to standard output rather + than to a rolling log file like daemons. (omalley via cutting) + + 3. HADOOP-262. Fix reduce tasks to report progress while they're + waiting for map outputs, so that they do not time out. + (Mahadev Konar via cutting) + + 4. HADOOP-245 and HADOOP-246. Improvements to record io package. + (Mahadev Konar via cutting) + + 5. HADOOP-276. Add logging config files to jar file so that they're + always found. (omalley via cutting) + + +Release 0.3.0 - 2006-06-02 + + 1. HADOOP-208. Enhance MapReduce web interface, adding new pages + for failed tasks, and tasktrackers. (omalley via cutting) + + 2. HADOOP-204. Tweaks to metrics package. (David Bowen via cutting) + + 3. HADOOP-209. Add a MapReduce-based file copier. This will + copy files within or between file systems in parallel. + (Milind Bhandarkar via cutting) + + 4. HADOOP-146. Fix DFS to check when randomly generating a new block + id that no existing blocks already have that id. + (Milind Bhandarkar via cutting) + + 5. HADOOP-180. Make a daemon thread that does the actual task clean ups, so + that the main offerService thread in the taskTracker doesn't get stuck + and miss his heartbeat window. This was killing many task trackers as + big jobs finished (300+ tasks / node). (omalley via cutting) + + 6. HADOOP-200. Avoid transmitting entire list of map task names to + reduce tasks. Instead just transmit the number of map tasks and + henceforth refer to them by number when collecting map output. + (omalley via cutting) + + 7. HADOOP-219. Fix a NullPointerException when handling a checksum + exception under SequenceFile.Sorter.sort(). (cutting & stack) + + 8. HADOOP-212. Permit alteration of the file block size in DFS. The + default block size for new files may now be specified in the + configuration with the dfs.block.size property. The block size + may also be specified when files are opened. + (omalley via cutting) + + 9. HADOOP-218. Avoid accessing configuration while looping through + tasks in JobTracker. (Mahadev Konar via cutting) + +10. HADOOP-161. Add hashCode() method to DFS's Block. + (Milind Bhandarkar via cutting) + +11. HADOOP-115. Map output types may now be specified. These are also + used as reduce input types, thus permitting reduce input types to + differ from reduce output types. (Runping Qi via cutting) + +12. HADOOP-216. Add task progress to task status page. + (Bryan Pendelton via cutting) + +13. HADOOP-233. Add web server to task tracker that shows running + tasks and logs. Also add log access to job tracker web interface. + (omalley via cutting) + +14. HADOOP-205. Incorporate pending tasks into tasktracker load + calculations. (Mahadev Konar via cutting) + +15. HADOOP-247. Fix sort progress to better handle exceptions. + (Mahadev Konar via cutting) + +16. HADOOP-195. Improve performance of the transfer of map outputs to + reduce nodes by performing multiple transfers in parallel, each on + a separate socket. (Sameer Paranjpye via cutting) + +17. HADOOP-251. Fix task processes to be tolerant of failed progress + reports to their parent process. (omalley via cutting) + +18. HADOOP-325. Improve the FileNotFound exceptions thrown by + LocalFileSystem to include the name of the file. + (Benjamin Reed via cutting) + +19. HADOOP-254. Use HTTP to transfer map output data to reduce + nodes. This, together with HADOOP-195, greatly improves the + performance of these transfers. (omalley via cutting) + +20. HADOOP-163. Cause datanodes that\ are unable to either read or + write data to exit, so that the namenode will no longer target + them for new blocks and will replicate their data on other nodes. + (Hairong Kuang via cutting) + +21. HADOOP-222. Add a -setrep option to the dfs commands that alters + file replication levels. (Johan Oskarson via cutting) + +22. HADOOP-75. In DFS, only check for a complete file when the file + is closed, rather than as each block is written. + (Milind Bhandarkar via cutting) + +23. HADOOP-124. Change DFS so that datanodes are identified by a + persistent ID rather than by host and port. This solves a number + of filesystem integrity problems, when, e.g., datanodes are + restarted. (Konstantin Shvachko via cutting) + +24. HADOOP-256. Add a C API for DFS. (Arun C Murthy via cutting) + +25. HADOOP-211. Switch to use the Jakarta Commons logging internally, + configured to use log4j by default. (Arun C Murthy and cutting) + +26. HADOOP-265. Tasktracker now fails to start if it does not have a + writable local directory for temporary files. In this case, it + logs a message to the JobTracker and exits. (Hairong Kuang via cutting) + +27. HADOOP-270. Fix potential deadlock in datanode shutdown. + (Hairong Kuang via cutting) + +Release 0.2.1 - 2006-05-12 + + 1. HADOOP-199. Fix reduce progress (broken by HADOOP-182). + (omalley via cutting) + + 2. HADOOP-201. Fix 'bin/hadoop dfs -report'. (cutting) + + 3. HADOOP-207. Fix JDK 1.4 incompatibility introduced by HADOOP-96. + System.getenv() does not work in JDK 1.4. (Hairong Kuang via cutting) + + +Release 0.2.0 - 2006-05-05 + + 1. Fix HADOOP-126. 'bin/hadoop dfs -cp' now correctly copies .crc + files. (Konstantin Shvachko via cutting) + + 2. Fix HADOOP-51. Change DFS to support per-file replication counts. + (Konstantin Shvachko via cutting) + + 3. Fix HADOOP-131. Add scripts to start/stop dfs and mapred daemons. + Use these in start/stop-all scripts. (Chris Mattmann via cutting) + + 4. Stop using ssh options by default that are not yet in widely used + versions of ssh. Folks can still enable their use by uncommenting + a line in conf/hadoop-env.sh. (cutting) + + 5. Fix HADOOP-92. Show information about all attempts to run each + task in the web ui. (Mahadev konar via cutting) + + 6. Fix HADOOP-128. Improved DFS error handling. (Owen O'Malley via cutting) + + 7. Fix HADOOP-129. Replace uses of java.io.File with new class named + Path. This fixes bugs where java.io.File methods were called + directly when FileSystem methods were desired, and reduces the + likelihood of such bugs in the future. It also makes the handling + of pathnames more consistent between local and dfs FileSystems and + between Windows and Unix. java.io.File-based methods are still + available for back-compatibility, but are deprecated and will be + removed once 0.2 is released. (cutting) + + 8. Change dfs.data.dir and mapred.local.dir to be comma-separated + lists of directories, no longer be space-separated. This fixes + several bugs on Windows. (cutting) + + 9. Fix HADOOP-144. Use mapred task id for dfs client id, to + facilitate debugging. (omalley via cutting) + +10. Fix HADOOP-143. Do not line-wrap stack-traces in web ui. + (omalley via cutting) + +11. Fix HADOOP-118. In DFS, improve clean up of abandoned file + creations. (omalley via cutting) + +12. Fix HADOOP-138. Stop multiple tasks in a single heartbeat, rather + than one per heartbeat. (Stefan via cutting) + +13. Fix HADOOP-139. Remove a potential deadlock in + LocalFileSystem.lock(). (Igor Bolotin via cutting) + +14. Fix HADOOP-134. Don't hang jobs when the tasktracker is + misconfigured to use an un-writable local directory. (omalley via cutting) + +15. Fix HADOOP-115. Correct an error message. (Stack via cutting) + +16. Fix HADOOP-133. Retry pings from child to parent, in case of + (local) communcation problems. Also log exit status, so that one + can distinguish patricide from other deaths. (omalley via cutting) + +17. Fix HADOOP-142. Avoid re-running a task on a host where it has + previously failed. (omalley via cutting) + +18. Fix HADOOP-148. Maintain a task failure count for each + tasktracker and display it in the web ui. (omalley via cutting) + +19. Fix HADOOP-151. Close a potential socket leak, where new IPC + connection pools were created per configuration instance that RPCs + use. Now a global RPC connection pool is used again, as + originally intended. (cutting) + +20. Fix HADOOP-69. Don't throw a NullPointerException when getting + hints for non-existing file split. (Bryan Pendelton via cutting) + +21. Fix HADOOP-157. When a task that writes dfs files (e.g., a reduce + task) failed and was retried, it would fail again and again, + eventually failing the job. The problem was that dfs did not yet + know that the failed task had abandoned the files, and would not + yet let another task create files with the same names. Dfs now + retries when creating a file long enough for locks on abandoned + files to expire. (omalley via cutting) + +22. Fix HADOOP-150. Improved task names that include job + names. (omalley via cutting) + +23. Fix HADOOP-162. Fix ConcurrentModificationException when + releasing file locks. (omalley via cutting) + +24. Fix HADOOP-132. Initial check-in of new Metrics API, including + implementations for writing metric data to a file and for sending + it to Ganglia. (David Bowen via cutting) + +25. Fix HADOOP-160. Remove some uneeded synchronization around + time-consuming operations in the TaskTracker. (omalley via cutting) + +26. Fix HADOOP-166. RPCs failed when passed subclasses of a declared + parameter type. This is fixed by changing ObjectWritable to store + both the declared type and the instance type for Writables. Note + that this incompatibly changes the format of ObjectWritable and + will render unreadable any ObjectWritables stored in files. + Nutch only uses ObjectWritable in intermediate files, so this + should not be a problem for Nutch. (Stefan & cutting) + +27. Fix HADOOP-168. MapReduce RPC protocol methods should all declare + IOException, so that timeouts are handled appropriately. + (omalley via cutting) + +28. Fix HADOOP-169. Don't fail a reduce task if a call to the + jobtracker to locate map outputs fails. (omalley via cutting) + +29. Fix HADOOP-170. Permit FileSystem clients to examine and modify + the replication count of individual files. Also fix a few + replication-related bugs. (Konstantin Shvachko via cutting) + +30. Permit specification of a higher replication levels for job + submission files (job.xml and job.jar). This helps with large + clusters, since these files are read by every node. (cutting) + +31. HADOOP-173. Optimize allocation of tasks with local data. (cutting) + +32. HADOOP-167. Reduce number of Configurations and JobConf's + created. (omalley via cutting) + +33. NUTCH-256. Change FileSystem#createNewFile() to create a .crc + file. The lack of a .crc file was causing warnings. (cutting) + +34. HADOOP-174. Change JobClient to not abort job until it has failed + to contact the job tracker for five attempts, not just one as + before. (omalley via cutting) + +35. HADOOP-177. Change MapReduce web interface to page through tasks. + Previously, when jobs had more than a few thousand tasks they + could crash web browsers. (Mahadev Konar via cutting) + +36. HADOOP-178. In DFS, piggyback blockwork requests from datanodes + on heartbeat responses from namenode. This reduces the volume of + RPC traffic. Also move startup delay in blockwork from datanode + to namenode. This fixes a problem where restarting the namenode + triggered a lot of uneeded replication. (Hairong Kuang via cutting) + +37. HADOOP-183. If the DFS namenode is restarted with different + minimum and/or maximum replication counts, existing files' + replication counts are now automatically adjusted to be within the + newly configured bounds. (Hairong Kuang via cutting) + +38. HADOOP-186. Better error handling in TaskTracker's top-level + loop. Also improve calculation of time to send next heartbeat. + (omalley via cutting) + +39. HADOOP-187. Add two MapReduce examples/benchmarks. One creates + files containing random data. The second sorts the output of the + first. (omalley via cutting) + +40. HADOOP-185. Fix so that, when a task tracker times out making the + RPC asking for a new task to run, the job tracker does not think + that it is actually running the task returned. (omalley via cutting) + +41. HADOOP-190. If a child process hangs after it has reported + completion, its output should not be lost. (Stack via cutting) + +42. HADOOP-184. Re-structure some test code to better support testing + on a cluster. (Mahadev Konar via cutting) + +43. HADOOP-191 Add streaming package, Hadoop's first contrib module. + This permits folks to easily submit MapReduce jobs whose map and + reduce functions are implemented by shell commands. Use + 'bin/hadoop jar build/hadoop-streaming.jar' to get details. + (Michel Tourn via cutting) + +44. HADOOP-189. Fix MapReduce in standalone configuration to + correctly handle job jar files that contain a lib directory with + nested jar files. (cutting) + +45. HADOOP-65. Initial version of record I/O framework that enables + the specification of record types and generates marshalling code + in both Java and C++. Generated Java code implements + WritableComparable, but is not yet otherwise used by + Hadoop. (Milind Bhandarkar via cutting) + +46. HADOOP-193. Add a MapReduce-based FileSystem benchmark. + (Konstantin Shvachko via cutting) + +47. HADOOP-194. Add a MapReduce-based FileSystem checker. This reads + every block in every file in the filesystem. (Konstantin Shvachko + via cutting) + +48. HADOOP-182. Fix so that lost task trackers to not change the + status of reduce tasks or completed jobs. Also fixes the progress + meter so that failed tasks are subtracted. (omalley via cutting) + +49. HADOOP-96. Logging improvements. Log files are now separate from + standard output and standard error files. Logs are now rolled. + Logging of all DFS state changes can be enabled, to facilitate + debugging. (Hairong Kuang via cutting) + + +Release 0.1.1 - 2006-04-08 + + 1. Added CHANGES.txt, logging all significant changes to Hadoop. (cutting) + + 2. Fix MapReduceBase.close() to throw IOException, as declared in the + Closeable interface. This permits subclasses which override this + method to throw that exception. (cutting) + + 3. Fix HADOOP-117. Pathnames were mistakenly transposed in + JobConf.getLocalFile() causing many mapred temporary files to not + be removed. (Raghavendra Prabhu via cutting) + + 4. Fix HADOOP-116. Clean up job submission files when jobs complete. + (cutting) + + 5. Fix HADOOP-125. Fix handling of absolute paths on Windows (cutting) + +Release 0.1.0 - 2006-04-01 + + 1. The first release of Hadoop. + diff --git a/aarch64/share/doc/hadoop/common/LICENSE.txt b/aarch64/share/doc/hadoop/common/LICENSE.txt new file mode 100644 index 0000000..6ccfd09 --- /dev/null +++ b/aarch64/share/doc/hadoop/common/LICENSE.txt @@ -0,0 +1,284 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +APACHE HADOOP SUBCOMPONENTS: + +The Apache Hadoop project contains subcomponents with separate copyright +notices and license terms. Your use of the source code for the these +subcomponents is subject to the terms and conditions of the following +licenses. + +For the org.apache.hadoop.util.bloom.* classes: + +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract + * 034819 (http://www.one-lab.org) + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +For portions of the native implementation of slicing-by-8 CRC calculation +in src/main/native/src/org/apache/hadoop/util: + +/** + * Copyright 2008,2009,2010 Massachusetts Institute of Technology. + * All rights reserved. Use of this source code is governed by a + * BSD-style license that can be found in the LICENSE file. + */ + + For src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4.c: + +/* + LZ4 - Fast LZ compression algorithm + Copyright (C) 2011, Yann Collet. + BSD License + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ diff --git a/aarch64/share/doc/hadoop/common/NOTICE.txt b/aarch64/share/doc/hadoop/common/NOTICE.txt new file mode 100644 index 0000000..62fc581 --- /dev/null +++ b/aarch64/share/doc/hadoop/common/NOTICE.txt @@ -0,0 +1,2 @@ +This product includes software developed by The Apache Software +Foundation (http://www.apache.org/). diff --git a/aarch64/share/doc/hadoop/common/README.txt b/aarch64/share/doc/hadoop/common/README.txt new file mode 100644 index 0000000..148cd31 --- /dev/null +++ b/aarch64/share/doc/hadoop/common/README.txt @@ -0,0 +1,31 @@ +For the latest information about Hadoop, please visit our website at: + + http://hadoop.apache.org/core/ + +and our wiki, at: + + http://wiki.apache.org/hadoop/ + +This distribution includes cryptographic software. The country in +which you currently reside may have restrictions on the import, +possession, use, and/or re-export to another country, of +encryption software. BEFORE using any encryption software, please +check your country's laws, regulations and policies concerning the +import, possession, or use, and re-export of encryption software, to +see if this is permitted. See for more +information. + +The U.S. Government Department of Commerce, Bureau of Industry and +Security (BIS), has classified this software as Export Commodity +Control Number (ECCN) 5D002.C.1, which includes information security +software using or performing cryptographic functions with asymmetric +algorithms. The form and manner of this Apache Software Foundation +distribution makes it eligible for export under the License Exception +ENC Technology Software Unrestricted (TSU) exception (see the BIS +Export Administration Regulations, Section 740.13) for both object +code and source code. + +The following provides more details on the included cryptographic +software: + Hadoop Core uses the SSL libraries from the Jetty project written +by mortbay.org. diff --git a/aarch64/share/doc/hadoop/hdfs/CHANGES.txt b/aarch64/share/doc/hadoop/hdfs/CHANGES.txt new file mode 100644 index 0000000..6c385fb --- /dev/null +++ b/aarch64/share/doc/hadoop/hdfs/CHANGES.txt @@ -0,0 +1,6945 @@ +Hadoop HDFS Change Log + +Release 2.2.0 - 2013-10-13 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + HDFS-5230. Introduce RpcInfo to decouple XDR classes from the RPC API. + (Haohui Mai via brandonli) + + IMPROVEMENTS + + HDFS-5246. Make Hadoop nfs server port and mount daemon port + configurable. (Jinghui Wang via brandonli) + + HDFS-5256. Use guava LoadingCache to implement DFSClientCache. (Haohui Mai + via brandonli) + + HDFS-5308. Replace HttpConfig#getSchemePrefix with implicit schemes in HDFS + JSP. (Haohui Mai via jing9) + + OPTIMIZATIONS + + BUG FIXES + + HDFS-5139. Remove redundant -R option from setrep. + + HDFS-5251. Race between the initialization of NameNode and the http + server. (Haohui Mai via suresh) + + HDFS-5258. Skip tests in TestHDFSCLI that are not applicable on Windows. + (Chuan Liu via cnauroth) + + HDFS-5186. TestFileJournalManager fails on Windows due to file handle leaks. + (Chuan Liu via cnauroth) + + HDFS-5031. BlockScanner scans the block multiple times. (Vinay via Arpit + Agarwal) + + HDFS-5268. NFS write commit verifier is not set in a few places (brandonli) + + HDFS-5265. Namenode fails to start when dfs.https.port is unspecified. + (Haohui Mai via jing9) + + HDFS-5255. Distcp job fails with hsftp when https is enabled in insecure + cluster. (Arpit Agarwal) + + HDFS-5279. Guard against NullPointerException in NameNode JSP pages before + initialization of FSNamesystem. (cnauroth) + + HDFS-5289. Race condition in TestRetryCacheWithHA#testCreateSymlink causes + spurious test failure. (atm) + + HDFS-5300. FSNameSystem#deleteSnapshot() should not check owner in case of + permissions disabled. (Vinay via jing9) + + HDFS-5306. Datanode https port is not available at the namenode. (Suresh + Srinivas via brandonli) + + HDFS-5299. DFS client hangs in updatePipeline RPC when failover happened. + (Vinay via jing9) + + HDFS-5259. Support client which combines appended data with old data + before sends it to NFS server. (brandonli) + +Release 2.1.1-beta - 2013-09-23 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + HDFS-4962 Use enum for nfs constants (Nicholas SZE via jing9) + + HDFS-5071 Change hdfs-nfs parent project to hadoop-project (brandonli) + + HDFS-4763 Add script changes/utility for starting NFS gateway (brandonli) + + HDFS-5076 Add MXBean methods to query NN's transaction information and + JournalNode's journal status. (jing9) + + HDFS-5104 Support dotdot name in NFS LOOKUP operation (brandonli) + + HDFS-5107 Fix array copy error in Readdir and Readdirplus responses + (brandonli) + + HDFS-5110 Change FSDataOutputStream to HdfsDataOutputStream for opened + streams to fix type cast error. (brandonli) + + HDFS-5069 Include hadoop-nfs and hadoop-hdfs-nfs into hadoop dist for + NFS deployment (brandonli) + + HDFS-4947 Add NFS server export table to control export by hostname or + IP range (Jing Zhao via brandonli) + + HDFS-5078 Support file append in NFSv3 gateway to enable data streaming + to HDFS (brandonli) + + HDFS-5136 MNT EXPORT should give the full group list which can mount the + exports (brandonli) + + HDFS-5118. Provide testing support for DFSClient to drop RPC responses. + (jing9) + + HDFS-5085. Refactor o.a.h.nfs to support different types of + authentications. (jing9) + + HDFS-5067 Support symlink operations in NFS gateway. (brandonli) + + HDFS-5199 Add more debug trace for NFS READ and WRITE. (brandonli) + + HDFS-5234 Move RpcFrameDecoder out of the public API. + (Haohui Mai via brandonli) + + IMPROVEMENTS + + HDFS-4513. Clarify in the WebHDFS REST API that all JSON respsonses may + contain additional properties. (szetszwo) + + HDFS-5061. Make FSNameSystem#auditLoggers an unmodifiable list. + (Arpit Agarwal via suresh) + + HDFS-4905. Add appendToFile command to "hdfs dfs". (Arpit Agarwal via + cnauroth) + + HDFS-4926. Namenode webserver's page has a tooltip that is inconsistent + with the datanode HTML link. (Vivek Ganesan via jing9) + + HDFS-5047. Supress logging of full stack trace of quota and lease + exceptions. (Robert Parker via kihwal) + + HDFS-5111. Remove duplicated error message for snapshot commands when + processing invalid arguments. (jing9) + + HDFS-5045. Add more unit tests for retry cache to cover all AtMostOnce + methods. (jing9) + + HDFS-3245. Add metrics and web UI for cluster version summary. (Ravi + Prakash via kihwal) + + HDFS-5128. Allow multiple net interfaces to be used with HA namenode RPC + server. (kihwal) + + HDFS-5150. Allow per NN SPN for internal SPNEGO. (kihwal) + + HDFS-4680. Audit logging of delegation tokens for MR tracing. (Andrew Wang) + + HDFS-5212. Refactor RpcMessage and NFS3Response to support different + types of authentication information. (jing9) + + HDFS-4971. Move IO operations out of locking in OpenFileCtx. (brandonli and + jing9) + + OPTIMIZATIONS + + BUG FIXES + + HDFS-5043. For HdfsFileStatus, set default value of childrenNum to -1 + instead of 0 to avoid confusing applications. (brandonli) + + HDFS-5028. LeaseRenewer throws ConcurrentModificationException when timeout. + (zhaoyunjiong via szetszwo) + + HDFS-4993. Fsck can fail if a file is renamed or deleted. (Robert Parker + via kihwal) + + HDFS-5091. Support for spnego keytab separate from the JournalNode keytab + for secure HA. (jing9) + + HDFS-5051. nn fails to download checkpointed image from snn in some + setups. (Vinay and suresh via suresh) + + HDFS-4898. BlockPlacementPolicyWithNodeGroup.chooseRemoteRack() fails to + properly fallback to local rack. (szetszwo) + + HDFS-4632. globStatus using backslash for escaping does not work on Windows. + (Chuan Liu via cnauroth) + + HDFS-5080. BootstrapStandby not working with QJM when the existing NN is + active. (jing9) + + HDFS-5099. Namenode#copyEditLogSegmentsToSharedDir should close + EditLogInputStreams upon finishing. (Chuan Liu via cnauroth) + + HDFS-2994. If lease soft limit is recovered successfully + the append can fail. (Tao Luo via shv) + + HDFS-5100. TestNamenodeRetryCache fails on Windows due to incorrect cleanup. + (Chuan Liu via cnauroth) + + HDFS-5103. TestDirectoryScanner fails on Windows. (Chuan Liu via cnauroth) + + HDFS-5102. Snapshot names should not be allowed to contain slash characters. + (jing9) + + HDFS-5105. TestFsck fails on Windows. (Chuan Liu via arp) + + HDFS-5106. TestDatanodeBlockScanner fails on Windows due to incorrect path + format. (Chuan Liu via cnauroth) + + HDFS-4594. WebHDFS open sets Content-Length header to what is specified by + length parameter rather than how much data is actually returned. (cnauroth) + + HDFS-5124. DelegationTokenSecretManager#retrievePassword can cause deadlock + in NameNode. (Daryn Sharp via jing9) + + HDFS-5132. Deadlock in NameNode between SafeModeMonitor#run and + DatanodeManager#handleHeartbeat. (kihwal) + + HDFS-5077. NPE in FSNamesystem.commitBlockSynchronization(). + (Plamen Jeliazkov via shv) + + HDFS-5140. Too many safemode monitor threads being created in the standby + namenode causing it to fail with out of memory error. (jing9) + + HDFS-5159. Secondary NameNode fails to checkpoint if error occurs + downloading edits on first checkpoint. (atm) + + HDFS-5192. NameNode may fail to start when + dfs.client.test.drop.namenode.response.number is set. (jing9) + + HDFS-5219. Add configuration keys for retry policy in WebHDFSFileSystem. + (Haohui Mai via jing9) + + HDFS-5231. Fix broken links in the document of HDFS Federation. (Haohui Mai + via jing9) + + HDFS-5249. Fix dumper thread which may die silently. (brandonli) + +Release 2.1.0-beta - 2013-08-22 + + INCOMPATIBLE CHANGES + + HDFS-4053. Increase the default block size. (eli) + + HDFS-4305. Add a configurable limit on number of blocks per file, and min + block size. (Andrew Wang via atm) + + HDFS-4434. Provide a mapping from INodeId to INode. (suresh) + + HDFS-2802. Add HDFS Snapshot feature. (See breakdown of tasks below for + subtasks and contributors) + + HDFS-4866. Protocol buffer support cannot compile under C. (Arpit Agarwal via + cnauroth) + + HDFS-5083. Update the HDFS compatibility version range. (kihwal) + + NEW FEATURES + + HDFS-1804. Add a new block-volume device choosing policy that looks at + free space. (atm) + + HDFS-4296. Reserve layout version for release 1.2.0. (suresh) + + HDFS-4334. Add a unique id to INode. (Brandon Li via szetszwo) + + HDFS-4339. Persist inode id in fsimage and editlog. (Brandon Li via + suresh) + + HDFS-4340. Update addBlock() to inculde inode id as additional argument. + (Brandon Li via suresh) + + HDFS-4502. JsonUtil.toFileStatus(..) should check if the fileId property + exists. (Brandon Li via suresh) + + HDFS-2576. Enhances the DistributedFileSystem's create API so that clients + can specify favored datanodes for a file's blocks. (ddas) + + HDFS-347. DFS read performance suboptimal when client co-located on nodes + with data. (Colin Patrick McCabe via todd and atm) + + HADOOP-8562. Enhancements to support Hadoop on Windows Server and Windows + Azure environments. (See breakdown of tasks below for subtasks and + contributors) + + HDFS-3601. Add BlockPlacementPolicyWithNodeGroup to support block placement + with 4-layer network topology. (Junping Du via szetszwo) + + HDFS-3495. Update Balancer to support new NetworkTopology with NodeGroup. + (Junping Du via szetszwo) + + HDFS-4659 Support setting execution bit for regular files (Brandon Li via sanjay) + + HDFS-4762 Provide HDFS based NFSv3 and Mountd implementation (brandonli) + + HDFS-4372. Track NameNode startup progress. (cnauroth) + + HDFS-4373. Add HTTP API for querying NameNode startup progress. (cnauroth) + + HDFS-4374. Display NameNode startup progress in UI. (cnauroth) + + HDFS-4974. Add Idempotent and AtMostOnce annotations to namenode + protocol methods. (suresh) + + HDFS-4979. Implement retry cache on Namenode. (suresh) + + HDFS-5025. Record ClientId and CallId in EditLog to enable rebuilding + retry cache in case of HA failover. (Jing Zhao via suresh) + + IMPROVEMENTS + + HDFS-4461. DirectoryScanner: volume path prefix takes up memory for every + block that is scanned (Colin Patrick McCabe) + + HDFS-4222. NN is unresponsive and loses heartbeats from DNs when + configured to use LDAP and LDAP has issues. (Xiaobo Peng, suresh) + + HDFS-4304. Make FSEditLogOp.MAX_OP_SIZE configurable. (Colin Patrick + McCabe via atm) + + HDFS-4518. Finer grained metrics for HDFS capacity. + (Arpit Agarwal via suresh) + + HDFS-4519. Support overriding jsvc binary and log file locations + when launching secure datanode. (Chris Nauroth via suresh) + + HDFS-4569. Small image transfer related cleanups. + (Andrew Wang via suresh) + + HDFS-4521. Invalid network toploogies should not be cached. (Colin Patrick + McCabe via atm) + + HDFS-4246. The exclude node list should be more forgiving, for each output + stream. (harsh via atm) + + HDFS-4635. Move BlockManager#computeCapacity to LightWeightGSet. (suresh) + + HDFS-4621. Additional logging to help diagnose slow QJM syncs. (todd) + + HDFS-4618. Default transaction interval for checkpoints is too low. (todd) + + HDFS-4525. Provide an API for knowing that whether file is closed or not. + (SreeHari via umamahesh) + + HDFS-3940. Add Gset#clear method and clear the block map when namenode is + shutdown. (suresh) + + HDFS-4679. Namenode operation checks should be done in a consistent + manner. (suresh) + + HDFS-4693. Some test cases in TestCheckpoint do not clean up after + themselves. (Arpit Agarwal, suresh via suresh) + + HDFS-3817. Avoid printing SafeModeException stack trace. + (Brandon Li via suresh) + + HDFS-4124. Refactor INodeDirectory#getExistingPathINodes() to enable + returning more than INode array. (Jing Zhao via suresh) + + HDFS-4151. Change the methods in FSDirectory to pass INodesInPath instead + of INode[] as a parameter. (szetszwo) + + HDFS-4129. Add utility methods to dump NameNode in memory tree for + testing. (szetszwo via suresh) + + HDFS-4152. Add a new class BlocksMapUpdateInfo for the parameter in + INode.collectSubtreeBlocksAndClear(..). (Jing Zhao via szetszwo) + + HDFS-4206. Change the fields in INode and its subclasses to private. + (szetszwo) + + HDFS-4215. Remove locking from addToParent(..) since it is used in image + loading, and add INode.isFile(). (szetszwo) + + HDFS-4243. When replacing an INodeDirectory, the parent pointers of the + children of the child have to be updated to the new child. (Jing Zhao + via szetszwo) + + HDFS-4209. Clean up the addNode/addChild/addChildNoQuotaCheck methods in + FSDirectory and INodeDirectory. (szetszwo) + + HDFS-4346. Add SequentialNumber as a base class for INodeId and + GenerationStamp. (szetszwo) + + HDFS-4721. Speed up lease recovery by avoiding stale datanodes and choosing + the datanode with the most recent heartbeat as the primary. (Varun Sharma + via szetszwo) + + HDFS-4804. WARN when users set the block balanced preference percent below + 0.5 or above 1.0. (Stephen Chu via atm) + + HDFS-4698. Provide client-side metrics for remote reads, local reads, and + short-circuit reads. (Colin Patrick McCabe via atm) + + HDFS-3498. Support replica removal in BlockPlacementPolicy and make + BlockPlacementPolicyDefault extensible for reusing code in subclasses. + (Junping Du via szetszwo) + + HDFS-4234. Use generic code for choosing datanode in Balancer. (szetszwo) + + HDFS-4880. Print the image and edits file loaded by the namenode in the + logs. (Arpit Agarwal via suresh) + + HDFS-2572. Remove unnecessary double-check in DN#getHostName. (harsh) + + HDFS-2857. Cleanup BlockInfo class. (suresh) + + HDFS-3009. Remove duplicate code in DFSClient#isLocalAddress by using + NetUtils. (Hari Mankude via suresh) + + HDFS-4914. Use DFSClient.Conf instead of Configuration. (szetszwo) + + HDFS-4883. complete() should verify fileId. (Tao Luo via shv) + + HDFS-4772. Add number of children in HdfsFileStatus. (brandonli) + + HDFS-4932. Avoid a wide line on the name node webUI if we have more Journal + nodes. (Fengdong Yu via cnauroth) + + HDFS-4908. Reduce snapshot inode memory usage. (szetszwo) + + HDFS-4645. Move from randomly generated block ID to sequentially generated + block ID. (Arpit Agarwal via szetszwo) + + HDFS-4912. Cleanup FSNamesystem#startFileInternal. (suresh) + + HDFS-4903. Print trash configuration and trash emptier state in + namenode log. (Arpit Agarwal via suresh) + + HDFS-4992. Make balancer's mover thread count and dispatcher thread count + configurable. (Max Lapan via szetszwo) + + HDFS-4996. ClientProtocol#metaSave can be made idempotent by overwriting the + output file instead of appending to it. (cnauroth) + + HADOOP-9418. Add symlink support to DistributedFileSystem (Andrew Wang via + Colin Patrick McCabe) + + HDFS-5007. Replace hard-coded property keys with DFSConfigKeys fields. + (Kousuke Saruta via jing9) + + HDFS-5008. Make ClientProtocol#abandonBlock() idempotent. (jing9) + + HADOOP-9760. Move GSet and related classes to common from HDFS. + (suresh) + + HDFS-5020. Make DatanodeProtocol#blockReceivedAndDeleted idempotent. + (jing9) + + HDFS-5024. Make DatanodeProtocol#commitBlockSynchronization idempotent. + (Arpit Agarwal via jing9) + + HDFS-3880. Use Builder to build RPC server in HDFS. + (Brandon Li and Junping Du via szetszwo) + + OPTIMIZATIONS + + HDFS-4465. Optimize datanode ReplicasMap and ReplicaInfo. (atm) + + HDFS-5027. On startup, DN should scan volumes in parallel. (atm) + + BUG FIXES + + HDFS-4626. ClientProtocol#getLinkTarget should throw an exception for + non-symlink and non-existent paths. (Andrew Wang via cmccabe) + + HDFS-3934. duplicative dfs_hosts entries handled wrong. (Colin Patrick + McCabe) + + HDFS-4470. Several HDFS tests attempt file operations on invalid HDFS + paths when running on Windows. (Chris Nauroth via suresh) + + HDFS-4471. Namenode WebUI file browsing does not work with wildcard + addresses configured. (Andrew Wang via atm) + + HDFS-4342. Directories configured in dfs.namenode.edits.dir.required + but not in dfs.namenode.edits.dir are silently ignored. (Arpit Agarwal + via szetszwo) + + HDFS-4482. ReplicationMonitor thread can exit with NPE due to the race + between delete and replication of same file. (umamahesh) + + HDFS-4269. Datanode rejects all datanode registrations from localhost + in single-node developer setup on Windows. (Chris Nauroth via suresh) + + HDFS-4235. When outputting XML, OfflineEditsViewer can't handle some edits + containing non-ASCII strings. (Colin Patrick McCabe via atm) + + HDFS-4541. Set hadoop.log.dir and hadoop.id.str when starting secure + datanode to write the logs to right dir by default. (Arpit Gupta via + suresh) + + HDFS-4540. Namenode http server should use the web authentication + keytab for spnego principal. (Arpit Gupta via suresh) + + HDFS-4544. Error in deleting blocks should not do check disk, for + all types of errors. (Arpit Agarwal via suresh) + + HDFS-4565. Use DFSUtil.getSpnegoKeytabKey() to get the spnego keytab key + in secondary namenode and namenode http server. (Arpit Gupta via suresh) + + HDFS-4571. WebHDFS should not set the service hostname on the server side. + (tucu) + + HDFS-4013. TestHftpURLTimeouts throws NPE. (Chao Shi via suresh) + + HDFS-4592. Default values for access time precision are out of sync between + hdfs-default.xml and the code. (atm) + + HDFS-4522. LightWeightGSet expects incrementing a volatile to be atomic. + (Colin Patrick McCabe via atm) + + HDFS-4484. libwebhdfs compilation broken with gcc 4.6.2. (Colin Patrick + McCabe via atm) + + HDFS-4595. When short circuit read is fails, DFSClient does not fallback + to regular reads. (suresh) + + HDFS-4583. TestNodeCount fails. (Ivan Mitic via suresh) + + HDFS-4591. HA clients can fail to fail over while Standby NN is performing + long checkpoint. (atm) + + HDFS-3277. fail over to loading a different FSImage if the first one we + try to load is corrupt. (Colin Patrick McCabe and Andrew Wang via atm) + + HDFS-4596. Shutting down namenode during checkpointing can lead to md5sum + error. (Andrew Wang via atm) + + HDFS-4614. FSNamesystem#getContentSummary should use getPermissionChecker + helper method. (atm) + + HDFS-4620. Documentation for dfs.namenode.rpc-address specifies wrong + format. (Sandy Ryza via atm) + + HDFS-4609. TestAuditLogs should release log handles between tests. + (Ivan Mitic via szetszwo) + + HDFS-4598. Fix the default value of ConcatSourcesParam and the WebHDFS doc. + (szetszwo) + + HDFS-4655. DNA_FINALIZE is logged as being an unknown command by the DN + when received from the standby NN. (atm) + + HDFS-4656. DN heartbeat loop can be briefly tight. (atm) + + HDFS-4658. Standby NN will log that it has received a block report "after + becoming active" (atm) + + HDFS-4646. createNNProxyWithClientProtocol ignores configured timeout + value (Jagane Sundar via cos) + + HDFS-3981. Fix handling of FSN lock in getBlockLocations. (Xiaobo Peng + and todd via todd) + + HDFS-4676. TestHDFSFileSystemContract should set MiniDFSCluster variable + to null to free up memory. (suresh) + + HDFS-4669. TestBlockPoolManager fails using IBM java. (Tian Hong Wang via + suresh) + + HDFS-4643. Fix flakiness in TestQuorumJournalManager. (todd) + + HDFS-4639. startFileInternal() should not increment generation stamp. + (Plamen Jeliazkov via shv) + + HDFS-4695. TestEditLog leaks open file handles between tests. + (Ivan Mitic via suresh) + + HDFS-4737. JVM path embedded in fuse binaries. (Sean Mackrory via atm) + + HDFS-4739. NN can miscalculate the number of extra edit log segments to + retain. (atm) + + HDFS-4745. TestDataTransferKeepalive#testSlowReader has race condition that + causes sporadic failure. (Chris Nauroth via suresh) + + HDFS-4768. File handle leak in datanode when a block pool is removed. + (Chris Nauroth via suresh) + + HDFS-4748. MiniJournalCluster#restartJournalNode leaks resources, which + causes sporadic test failures. (Chris Nauroth via suresh) + + HDFS-4733. Make HttpFS username pattern configurable. (tucu via atm) + + HDFS-4778. Fixes some issues that the first patch on HDFS-2576 missed. + (ddas) + + HDFS-4785. Concat operation does not remove concatenated files from + InodeMap. (suresh) + + HDFS-4784. NPE in FSDirectory.resolvePath(). (Brandon Li via suresh) + + HDFS-4810. several HDFS HA tests have timeouts that are too short. (Chris + Nauroth via atm) + + HDFS-4799. Corrupt replica can be prematurely removed from + corruptReplicas map. (todd via kihwal) + + HDFS-4751. TestLeaseRenewer#testThreadName flakes. (Andrew Wang via atm) + + HDFS-4533. start-dfs.sh ignores additional parameters besides -upgrade. + (Fengdong Yu via suresh) + + HDFS-4765. Permission check of symlink deletion incorrectly throws + UnresolvedLinkException. (Andrew Wang via atm) + + HDFS-4300. TransferFsImage.downloadEditsToStorage should use a tmp file for + destination. (Andrew Wang via atm) + + HDFS-4813. Add volatile to BlocksMap.blocks so that the replication thread + can see the updated value. (Jing Zhao via szetszwo) + + HDFS-3180. Add socket timeouts to WebHdfsFileSystem. (Chris Nauroth via + szetszwo) + + HDFS-4787. Create a new HdfsConfiguration before each TestDFSClientRetries + testcases. (Tian Hong Wang via atm) + + HDFS-4830. Typo in config settings for AvailableSpaceVolumeChoosingPolicy + in hdfs-default.xml. (atm) + + HDFS-4824. FileInputStreamCache.close leaves dangling reference to + FileInputStreamCache.cacheCleaner. (Colin Patrick McCabe via todd) + + HDFS-4298. StorageRetentionManager spews warnings when used with QJM. (atm) + + HDFS-4725. Fix HDFS file handle leaks in FSEditLog, NameNode, + OfflineEditsBinaryLoader and some tests. (Chris Nauroth via szetszwo) + + HDFS-4825. webhdfs / httpfs tests broken because of min block size change. + (Andrew Wang via suresh) + + HDFS-4780. Use the correct relogin method for services. (Robert Parker via + kihwal) + + HDFS-4827. Slight update to the implementation of API for handling favored + nodes in DFSClient (ddas) + + HDFS-4865. Remove sub resource warning from httpfs log at startup time. + (ywskycn via tucu) + + HDFS-4240. For nodegroup-aware block placement, when a node is excluded, + the nodes in the same nodegroup should also be excluded. (Junping Du + via szetszwo) + + HDFS-4261. Fix bugs in Balaner causing infinite loop and + TestBalancerWithNodeGroup timeing out. (Junping Du via szetszwo) + + HDFS-4382. Fix typo MAX_NOT_CHANGED_INTERATIONS. (Ted Yu via suresh) + + HDFS-4840. ReplicationMonitor gets NPE during shutdown. (kihwal) + + HADOOP-8957 HDFS tests for AbstractFileSystem#IsValidName should be overridden for + embedded file systems like ViewFs (Chris Nauroth via Sanjay Radia) + + HDFS-4586. TestDataDirs.testGetDataDirsFromURIs fails with all directories + in dfs.datanode.data.dir are invalid. (Ivan Mitic via atm) + + HDFS-3792. Fix two findbugs introduced by HDFS-3695 (todd) + + HADOOP-9635 Fix potential Stack Overflow in DomainSocket.c (V. Karthik Kumar + via cmccabe) + + HDFS-3163. TestHDFSCLI.testAll fails if the user name is not all lowercase. + (Brandon Li via atm) + + HDFS-4845. FSNamesystem.deleteInternal should acquire write-lock before + changing the inode map. (Arpit Agarwal via szetszwo) + + HDFS-4910. TestPermission failed in branch-2. (Chuan Liu via cnauroth) + + HDFS-4906. HDFS Output streams should not accept writes after being + closed. (atm) + + HDFS-4917. Start-dfs.sh cannot pass the parameters correctly. + (Fengdong Yu via suresh) + + HDFS-4205. fsck fails with symlinks. (jlowe) + + HDFS-4927. CreateEditsLog creates inodes with an invalid inode ID, which then + cannot be loaded by a namenode. (cnauroth) + + HDFS-4944. WebHDFS cannot create a file path containing characters that must + be URI-encoded, such as space. (cnauroth) + + HDFS-4888. Refactor and fix FSNamesystem.getTurnOffTip. (Ravi Prakash via + kihwal) + + HDFS-4943. WebHdfsFileSystem does not work when original file path has + encoded chars. (Jerry He via szetszwo) + + HDFS-4948. mvn site for hadoop-hdfs-nfs fails. (brandonli) + + HDFS-4954. In nfs, OpenFileCtx.getFlushedOffset() should handle IOException. + (Brandon Li via szetszwo) + + HDFS-4887. TestNNThroughputBenchmark exits abruptly. (kihwal) + + HDFS-4980. Incorrect logging.properties file for hadoop-httpfs. + (Mark Grover via suresh) + + HDFS-4999. Fix TestShortCircuitLocalRead on branch-2. (cmccabe via kihwal) + + HDFS-4687. TestDelegationTokenForProxyUser#testWebHdfsDoAs is flaky with + JDK7. (Andrew Wang via atm) + + HDFS-5003. TestNNThroughputBenchmark failed caused by existing directories. + (Xi Fang via cnauroth) + + HDFS-5018. Misspelled DFSConfigKeys#DFS_NAMENODE_STALE_DATANODE_INTERVAL_DEFAULT + in javadoc of DatanodeInfo#isStale(). (Ted Yu via jing9) + + HDFS-4602. TestBookKeeperHACheckpoints fails. (umamahesh) + + HDFS-5016. Deadlock in pipeline recovery causes Datanode to be marked dead. + (suresh) + + HDFS-5228. The RemoteIterator returned by DistributedFileSystem.listFiles + may throw NullPointerException. (szetszwo and cnauroth via szetszwo) + + BREAKDOWN OF HDFS-347 SUBTASKS AND RELATED JIRAS + + HDFS-4353. Encapsulate connections to peers in Peer and PeerServer classes. + (Colin Patrick McCabe via todd) + + HDFS-4354. Create DomainSocket and DomainPeer and associated unit tests. + (Colin Patrick McCabe via todd) + + HDFS-4356. BlockReaderLocal should use passed file descriptors rather than paths. + (Colin Patrick McCabe via todd) + + HDFS-4388. DomainSocket should throw AsynchronousCloseException when appropriate. + (Colin Patrick McCabe via todd) + + HDFS-4390. Bypass UNIX domain socket unit tests when they cannot be run. + (Colin Patrick McCabe via todd) + + HDFS-4400. DFSInputStream#getBlockReader: last retries should ignore the cache + (Colin Patrick McCabe via todd) + + HDFS-4401. Fix bug in DomainSocket path validation + (Colin Patrick McCabe via todd) + + HDFS-4402. Some small DomainSocket fixes: avoid findbugs warning, change + log level, etc. (Colin Patrick McCabe via todd) + + HDFS-4418. increase default FileInputStreamCache size (todd) + + HDFS-4416. Rename dfs.datanode.domain.socket.path to dfs.domain.socket.path + (Colin Patrick McCabe via todd) + + HDFS-4417. Fix case where local reads get disabled incorrectly + (Colin Patrick McCabe and todd via todd) + + HDFS-4433. Make TestPeerCache not flaky (Colin Patrick McCabe via todd) + + HDFS-4438. TestDomainSocket fails when system umask is set to 0002. (Colin + Patrick McCabe via atm) + + HDFS-4440. Avoid annoying log message when dfs.domain.socket.path is not + set. (Colin Patrick McCabe via atm) + + HDFS-4473. Don't create domain socket unless we need it. (Colin Patrick McCabe via atm) + + HDFS-4485. DN should chmod socket path a+w. (Colin Patrick McCabe via atm) + + HDFS-4453. Make a simple doc to describe the usage and design of the + shortcircuit read feature. (Colin Patrick McCabe via atm) + + HDFS-4496. DFSClient: don't create a domain socket unless we need it (Colin + Patrick McCabe via todd) + + HDFS-347: style cleanups (Colin Patrick McCabe via atm) + + HDFS-4538. Allow use of legacy blockreader (Colin Patrick McCabe via todd) + + HDFS-4661. A few little code cleanups of some HDFS-347-related code. (Colin + Patrick McCabe via atm) + + BREAKDOWN OF HADOOP-8562 and HDFS-3602 SUBTASKS AND RELATED JIRAS + + HDFS-4145. Merge hdfs cmd line scripts from branch-1-win. (David Lao, + Bikas Saha, Lauren Yang, Chuan Liu, Thejas M Nair and Ivan Mitic via suresh) + + HDFS-4163. HDFS distribution build fails on Windows. (Chris Nauroth via + suresh) + + HDFS-4316. branch-trunk-win contains test code accidentally added during + work on fixing tests on Windows. (Chris Nauroth via suresh) + + HDFS-4297. Fix issues related to datanode concurrent reading and writing on + Windows. (Arpit Agarwal, Chuan Liu via suresh) + + HDFS-4573. Fix TestINodeFile on Windows. (Arpit Agarwal via suresh) + + HDFS-4572. Fix TestJournal failures on Windows. (Arpit Agarwal via suresh) + + HDFS-4287. HTTPFS tests fail on Windows. (Chris Nauroth via suresh) + + HDFS-4593. TestSaveNamespace fails on Windows. (Arpit Agarwal via suresh) + + HDFS-4582. TestHostsFiles fails on Windows. (Ivan Mitic via suresh) + + HDFS-4603. TestMiniDFSCluster fails on Windows. (Ivan Mitic via suresh) + + HDFS-4604. TestJournalNode fails on Windows. (Ivan Mitic via suresh) + + HDFS-4607. In TestGetConf.testGetSpecificKey(), use a platform-specific + line separator; otherwise, it fails on Windows. (Ivan Mitic via szetszwo) + + HDFS-4625. Make TestNNWithQJM#testNewNamenodeTakesOverWriter work on + Windows. (Ivan Mitic via suresh) + + HDFS-4674. TestBPOfferService fails on Windows due to failure parsing + datanode data directory as URI. (Chris Nauroth via suresh) + + HDFS-4615. Fix TestDFSShell failures on Windows. (Arpit Agarwal + via szetszwo) + + HDFS-4584. Skip TestNNWithQJM.testNewNamenodeTakesOverWriter() on Windows. + (Arpit Agarwal via szetszwo) + + HDFS-4732. Fix TestDFSUpgradeFromImage which fails on Windows due to + failure to unpack old image tarball that contains hard links. + (Chris Nauroth via szetszwo) + + HDFS-4741. TestStorageRestore#testStorageRestoreFailure fails on Windows. + (Arpit Agarwal via suresh) + + HDFS-4743. TestNNStorageRetentionManager fails on Windows. + (Chris Nauroth via suresh) + + HDFS-4740. Fixes for a few test failures on Windows. + (Arpit Agarwal via suresh) + + HDFS-4722. TestGetConf#testFederation times out on Windows. + (Ivan Mitic via suresh) + + HDFS-4705. Address HDFS test failures on Windows because of invalid + dfs.namenode.name.dir. (Ivan Mitic via suresh) + + HDFS-4734. HDFS Tests that use ShellCommandFencer are broken on Windows. + (Arpit Agarwal via suresh) + + HDFS-4610. Use common utils FileUtil#setReadable/Writable/Executable and + FileUtil#canRead/Write/Execute. (Ivan Mitic via suresh) + + HDFS-4677. Editlog should support synchronous writes. (ivanmi) + + HDFS-4752. TestRBWBlockInvalidation fails on Windows due to file locking. + (Chris Nauroth via suresh) + + HDFS-4783. TestDelegationTokensWithHA#testHAUtilClonesDelegationTokens fails + on Windows. (cnauroth) + + HDFS-4818. Several HDFS tests that attempt to make directories unusable do + not work correctly on Windows. (cnauroth) + + BREAKDOWN OF HDFS-2802 HDFS SNAPSHOT SUBTASKS AND RELATED JIRAS + + HDFS-4076. Support snapshot of single files. (szetszwo) + + HDFS-4082. Add editlog opcodes for snapshot create and delete operations. + (suresh via szetszwo) + + HDFS-4086. Add editlog opcodes to allow and disallow snapshots on a + directory. (Brandon Li via suresh) + + HDFS-4083. Protocol changes for snapshots. (suresh) + + HDFS-4077. Add support for Snapshottable Directory. (szetszwo via suresh) + + HDFS-4087. Protocol changes for listSnapshots functionality. + (Brandon Li via suresh) + + HDFS-4079. Add SnapshotManager which maintains a list for all the + snapshottable directories and supports snapshot methods such as setting a + directory to snapshottable and creating a snapshot. (szetszwo) + + HDFS-4078. Handle replication in snapshots. (szetszwo) + + HDFS-4084. Provide CLI support to allow and disallow snapshot + on a directory. (Brondon Li via suresh) + + HDFS-4091. Add snapshot quota to limit the number of snapshots allowed. + (szetszwo) + + HDFS-4097. Provide CLI support for createSnapshot. (Brandon Li via suresh) + + HDFS-4092. Update file deletion logic for snapshot so that the current inode + is removed from the circular linked list; and if some blocks at the end of + the block list no longer belong to any other inode, collect them and update + the block list. (szetszwo) + + HDFS-4111. Support snapshot of subtrees. (szetszwo via suresh) + + HDFS-4119. Complete the allowSnapshot code and add a test for it. (szetszwo) + + HDFS-4133. Add testcases for testing basic snapshot functionalities. + (Jing Zhao via suresh) + + HDFS-4116. Add auditlog for some snapshot operations. (Jing Zhao via suresh) + + HDFS-4095. Add some snapshot related metrics. (Jing Zhao via suresh) + + HDFS-4141. Support directory diff - the difference between the current state + and a previous snapshot of an INodeDirectory. (szetszwo) + + HDFS-4146. Use getter and setter in INodeFileWithLink to access blocks and + initialize root directory as snapshottable. (szetszwo) + + HDFS-4149. Implement the disallowSnapshot(..) in FSNamesystem and add + resetSnapshottable(..) to SnapshotManager. (szetszwo) + + HDFS-4147. When there is a snapshot in a subtree, deletion of the subtree + should fail. (Jing Zhao via szetszwo) + + HDFS-4150. Update the inode in the block map when a snapshotted file or a + snapshot file is deleted. (Jing Zhao via szetszwo) + + HDFS-4159. Rename should fail when the destination directory is + snapshottable and has snapshots. (Jing Zhao via szetszwo) + + HDFS-4170. Add snapshot information to INodesInPath. (szetszwo) + + HDFS-4177. Add a snapshot parameter to INodeDirectory.getChildrenList() for + selecting particular snapshot children list views. (szetszwo) + + HDFS-4148. Disallow write/modify operations on files and directories in a + snapshot. (Brandon Li via suresh) + + HDFS-4188. Add Snapshot.ID_COMPARATOR for comparing IDs and fix a bug in + ReadOnlyList.Util.binarySearch(..). (szetszwo) + + HDFS-4187. Add tests for replication handling in snapshots. (Jing Zhao via + szetszwo) + + HDFS-4196. Support renaming of snapshots. (Jing Zhao via szetszwo) + + HDFS-4175. Additional snapshot tests for more complicated directory + structure and modifications. (Jing Zhao via suresh) + + HDFS-4293. Fix TestSnapshot failure. (Jing Zhao via suresh) + + HDFS-4317. Change INode and its subclasses to support HDFS-4103. (szetszwo) + + HDFS-4103. Support O(1) snapshot creation. (szetszwo) + + HDFS-4330. Support snapshots up to the snapshot limit. (szetszwo) + + HDFS-4357. Fix a bug that if an inode is replaced, further INode operations + should apply to the new inode. (Jing Zhao via szetszwo) + + HDFS-4230. Support listing of all the snapshottable directories. (Jing Zhao + via szetszwo) + + HDFS-4244. Support snapshot deletion. (Jing Zhao via szetszwo) + + HDFS-4245. Include snapshot related operations in TestOfflineEditsViewer. + (Jing Zhao via szetszwo) + + HDFS-4395. In INodeDirectorySnapshottable's constructor, the passed-in dir + could be an INodeDirectoryWithSnapshot. (Jing Zhao via szetszwo) + + HDFS-4397. Fix a bug in INodeDirectoryWithSnapshot.Diff.combinePostDiff(..) + that it may put the wrong node into the deleted list. (szetszwo) + + HDFS-4407. Change INodeDirectoryWithSnapshot.Diff.combinePostDiff(..) to + merge-sort like and keep the postDiff parameter unmodified. (szetszwo) + + HDFS-4098. Add FileWithSnapshot, INodeFileUnderConstructionWithSnapshot and + INodeFileUnderConstructionSnapshot for supporting append to snapshotted + files. (szetszwo) + + HDFS-4126. Add reading/writing snapshot information to FSImage. + (Jing Zhao via suresh) + + HDFS-4436. Change INode.recordModification(..) to return only the current + inode and remove the updateCircularList parameter from some methods in + INodeDirectoryWithSnapshot.Diff. (szetszwo) + + HDFS-4429. When the latest snapshot exists, INodeFileUnderConstruction + should be replaced with INodeFileWithSnapshot but not INodeFile. + (Jing Zhao via szetszwo) + + HDFS-4441. Move INodeDirectoryWithSnapshot.Diff and the related classes to a + package. (szetszwo) + + HDFS-4432. Support INodeFileUnderConstructionWithSnapshot in FSImage + saving/loading. (Jing Zhao via suresh) + + HDFS-4131. Add capability to namenode to get snapshot diff. (Jing Zhao via + suresh) + + HDFS-4447. Refactor INodeDirectoryWithSnapshot for supporting general INode + diff lists. (szetszwo) + + HDFS-4189. Renames the getMutableXxx methods to getXxx4Write and fix a bug + that some getExistingPathINodes calls should be getINodesInPath4Write. + (szetszwo) + + HDFS-4361. When listing snapshottable directories, only return those + where the user has permission to take snapshots. (Jing Zhao via szetszwo) + + HDFS-4464. Combine collectSubtreeBlocksAndClear with deleteDiffsForSnapshot + and rename it to destroySubtreeAndCollectBlocks. (szetszwo) + + HDFS-4414. Add support for getting snapshot diff from DistributedFileSystem. + (Jing Zhao via suresh) + + HDFS-4446. Support file snapshots with diff lists. (szetszwo) + + HDFS-4480. Eliminate the file snapshot circular linked list. (szetszwo) + + HDFS-4481. Change fsimage to support snapshot file diffs. (szetszwo) + + HDFS-4500. Refactor snapshot INode methods. (szetszwo) + + HDFS-4487. Fix snapshot diff report for HDFS-4446. (Jing Zhao via szetszwo) + + HDFS-4431. Support snapshot in OfflineImageViewer. (Jing Zhao via szetszwo) + + HDFS-4503. Update computeContentSummary(..), spaceConsumedInTree(..) and + diskspaceConsumed(..) in INode for snapshot. (szetszwo) + + HDFS-4499. Fix file/directory/snapshot deletion for file diff. (Jing Zhao + via szetszwo) + + HDFS-4524. Update SnapshotManager#snapshottables when loading fsimage. + (Jing Zhao via szetszwo) + + HDFS-4520. Support listing snapshots under a snapshottable directory using + ls. (Jing Zhao via szetszwo) + + HDFS-4514. Add CLI for supporting snapshot rename, diff report, and + snapshottable directory listing. (Jing Zhao via szetszwo) + + HDFS-4523. Fix INodeFile replacement, TestQuota and javac errors from trunk + merge. (szetszwo) + + HDFS-4507. Update quota verification for snapshots. (szetszwo) + + HDFS-4545. With snapshots, FSDirectory.unprotectedSetReplication(..) always + changes file replication but it may or may not changes block replication. + (szetszwo) + + HDFS-4557. Fix FSDirectory#delete when INode#cleanSubtree returns 0. + (Jing Zhao via szetszwo) + + HDFS-4579. Annotate snapshot tests. (Arpit Agarwal via suresh) + + HDFS-4574. Move Diff to the util package. (szetszwo) + + HDFS-4563. Update namespace/diskspace usage after deleting snapshots. + (Jing Zhao via szetszwo) + + HDFS-4144. Create test for all snapshot-related metrics. + (Jing Zhao via suresh) + + HDFS-4556. Add snapshotdiff and LsSnapshottableDir tools to hdfs script. + (Arpit Agarwal via szetszwo) + + HDFS-4534. Add INodeReference in order to support rename with snapshots. + (szetszwo) + + HDFS-4616. Update the FilesDeleted metric while deleting file/dir in the + current tree. (Jing Zhao via szetszwo) + + HDFS-4627. Fix FSImageFormat#Loader NPE and synchronization issues. + (Jing Zhao via suresh) + + HDFS-4612. Not to use INode.getParent() when generating snapshot diff + report. (Jing Zhao via szetszwo) + + HDFS-4636. Update quota usage when deleting files/dirs that were created + after taking the latest snapshot. (Jing Zhao via szetszwo) + + HDFS-4648. For snapshot deletion, when merging the diff from to-delete + snapshot to the prior snapshot, make sure files/directories created after + the prior snapshot get deleted. (Jing Zhao via szetszwo) + + HDFS-4637. INodeDirectory#replaceSelf4Quota may incorrectly convert a newly + created directory to an INodeDirectoryWithSnapshot. (Jing Zhao via szetszwo) + + HDFS-4611. Update FSImage for INodeReference. (szetszwo) + + HDFS-4647. Rename should call setLocalName after an inode is removed from + snapshots. (Arpit Agarwal via szetszwo) + + HDFS-4684. Use INode id for image serialization when writing INodeReference. + (szetszwo) + + HDFS-4675. Fix rename across snapshottable directories. (Jing Zhao via + szetszwo) + + HDFS-4692. Use timestamp as default snapshot names. (szetszwo) + + HDFS-4666. Define ".snapshot" as a reserved inode name so that users cannot + create a file/directory with ".snapshot" as the name. If ".snapshot" is + used in a previous version of HDFS, it must be renamed before upgrade; + otherwise, upgrade will fail. (szetszwo) + + HDFS-4700. Fix the undo section of rename with snapshots. (Jing Zhao via + szetszwo) + + HDFS-4529. Disallow concat when one of the src files is in some snapshot. + (szetszwo) + + HDFS-4550. Refactor INodeDirectory.INodesInPath to a standalone class. + (szetszwo) + + HDFS-4707. Add snapshot methods to FilterFileSystem and fix findbugs + warnings. (szetszwo) + + HDFS-4706. Do not replace root inode for disallowSnapshot. (szetszwo) + + HDFS-4717. Change the path parameter type of the snapshot methods in + HdfsAdmin from String to Path. (szetszwo) + + HDFS-4708. Add snapshot user documentation. (szetszwo) + + HDFS-4726. Fix test failures after merging the INodeId-INode mapping + from trunk. (Jing Zhao via szetszwo) + + HDFS-4727. Update inodeMap after deleting files/directories/snapshots. + (Jing Zhao via szetszwo) + + HDFS-4719. Remove AbstractINodeDiff.Factory and move its methods to + AbstractINodeDiffList. (Arpit Agarwal via szetszwo) + + HDFS-4735. DisallowSnapshot throws IllegalStateException for nested + snapshottable directories. (Jing Zhao via szetszwo) + + HDFS-4738. Changes AbstractINodeDiff to implement Comparable, and + fix javadoc and other warnings. (szetszwo) + + HDFS-4686. Update quota computation for rename and INodeReference. + (Jing Zhao via szetszwo) + + HDFS-4729. Fix OfflineImageViewer and permission checking for snapshot + operations. (Jing Zhao via szetszwo) + + HDFS-4749. Use INodeId to identify the corresponding directory node in + FSImage saving/loading. (Jing Zhao via szetszwo) + + HDFS-4742. Fix appending to a renamed file with snapshot. (Jing Zhao via + szetszwo) + + HDFS-4755. Fix AccessControlException message and moves "implements + LinkedElement" from INode to INodeWithAdditionalFields. (szetszwo) + + HDFS-4650. Fix a bug in FSDirectory and add more unit tests for rename with + existence of snapshottable directories and snapshots. (Jing Zhao via + szetszwo) + + HDFS-4650. When passing two non-existing snapshot names to snapshotDiff, it + returns success if the names are the same. (Jing Zhao via szetszwo) + + HDFS-4767. If a directory is snapshottable, do not replace it when clearing + quota. (Jing Zhao via szetszwo) + + HDFS-4578. Restrict snapshot IDs to 24-bit wide. (Arpit Agarwal via + szetszwo) + + HDFS-4773. Fix bugs in quota usage computation and OfflineImageViewer. + (Jing Zhao via szetszwo) + + HDFS-4760. Update inodeMap after node replacement. (Jing Zhao via szetszwo) + + HDFS-4758. Disallow nested snapshottable directories and unwrap + RemoteException. (szetszwo) + + HDFS-4781. Fix a NullPointerException when listing .snapshot under + a non-existing directory. (szetszwo) + + HDFS-4791. Update and fix deletion of reference inode. (Jing Zhao via + szetszwo) + + HDFS-4798. Update computeContentSummary() for the reference nodes in + snapshots. (szetszwo) + + HDFS-4800. Fix INodeDirectoryWithSnapshot#cleanDeletedINode. (Jing Zhao via + szetszwo) + + HDFS-4801. lsSnapshottableDir throws IllegalArgumentException when root is + snapshottable. (Jing Zhao via szetszwo) + + HDFS-4802. Disallowing snapshot on / twice should throw SnapshotException + but not IllegalStateException. (Jing Zhao via szetszwo) + + HDFS-4806. In INodeDirectoryWithSnapshot, use isInLatestSnapshot() to + determine if an added/removed child should be recorded in the snapshot diff. + (Jing Zhao via szetszwo) + + HDFS-4809. When a QuotaExceededException is thrown during rename, the quota + usage should be subtracted back. (Jing Zhao via szetszwo) + + HDFS-4842. Identify the correct prior snapshot when deleting a + snapshot under a renamed subtree. (jing9) + + HDFS-4846. Clean up snapshot CLI commands output stacktrace for invalid + arguments. (Jing Zhao via brandonli) + + HDFS-4857. Snapshot.Root and AbstractINodeDiff#snapshotINode should not be + put into INodeMap when loading FSImage. (jing9) + + HDFS-4863. The root directory should be added to the snapshottable + directory list while loading fsimage. (jing9) + + HDFS-4848. copyFromLocal and renaming a file to ".snapshot" should output + that ".snapshot" is a reserved name. (Jing Zhao via brandonli) + + HDFS-4826. TestNestedSnapshots times out due to repeated slow edit log + flushes when running on virtualized disk. (Chris Nauroth via szetszwo) + + HDFS-4876. Fix the javadoc of FileWithSnapshot and move FileDiffList to + FileWithSnapshot. (szetszwo) + + HDFS-4850. Fix OfflineImageViewer to work on fsimages with empty files or + snapshots. (jing9) + + HDFS-4877. Snapshot: fix the scenario where a directory is renamed under + its prior descendant. (jing9) + + HDFS-4873. callGetBlockLocations returns incorrect number of blocks for + snapshotted files. (jing9) + + HDFS-4819. Update Snapshot doc to clarify that nested snapshots are not + allowed. (szetszwo) + + HDFS-4902. DFSClient.getSnapshotDiffReport should use string path rather + than o.a.h.fs.Path. (Binglin Chang via jing9) + + HDFS-4875. Add a test for testing snapshot file length. + (Arpit Agarwal via jing9) + + HDFS-4841. FsShell commands using secure webhfds fail ClientFinalizer + shutdown hook. (rkanter via tucu) + + HDFS-4951. FsShell commands using secure httpfs throw exceptions due + to missing TokenRenewer. (rknater via tucu) + + HDFS-4969. WebhdfsFileSystem expects non-standard WEBHDFS Json element. + (rkanter via tucu) + + HDFS-4797. BlockScanInfo does not override equals(..) and hashCode() + consistently. (szetszwo) + + HDFS-4978. Make disallowSnapshot idempotent. (jing9) + + HDFS-5005. Move SnapshotException and SnapshotAccessControlException + to o.a.h.hdfs.protocol. (jing9) + + HDFS-4982. JournalNode should relogin from keytab before fetching logs + from other JNs (todd) + +Release 2.0.5-alpha - 06/06/2013 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + +Release 2.0.4-alpha - 2013-04-25 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + +Release 2.0.3-alpha - 2013-02-06 + + INCOMPATIBLE CHANGES + + HDFS-4122. Cleanup HDFS logs and reduce the size of logged messages. + (suresh) + + HDFS-4362. GetDelegationTokenResponseProto does not handle null token. + (suresh) + + HDFS-4367. GetDataEncryptionKeyResponseProto does not handle null + response. (suresh) + + HDFS-4364. GetLinkTargetResponseProto does not handle null path. (suresh) + + HDFS-4369. GetBlockKeysResponseProto does not handle null response. + (suresh) + + HDFS-4451. hdfs balancer command returns exit code 1 on success instead + of 0. (Joshua Blatt via suresh) + + HDFS-4350. Make enabling of stale marking on read and write paths + independent. (Andrew Wang via suresh) + + + NEW FEATURES + + HDFS-2656. Add libwebhdfs, a pure C client based on WebHDFS. + (Jaimin D Jetly and Jing Zhao via szetszwo) + + HDFS-3912. Detect and avoid stale datanodes for writes. + (Jing Zhao via suresh) + + HDFS-4059. Add number of stale DataNodes to metrics. (Jing Zhao via suresh) + + HDFS-4155. libhdfs implementation of hsync API (Liang Xie via todd) + + HDFS-4213. Add an API to hsync for updating the last block length at the + namenode. (Jing Zhao via szetszwo) + + HDFS-3077. Implement QuorumJournalManager, a distributed mechanism for + reliably storing HDFS edit logs. See dedicated section below for breakdown + of subtasks. + + IMPROVEMENTS + + HDFS-3925. Prettify PipelineAck#toString() for printing to a log + (Andrew Wang via todd) + + HDFS-3939. NN RPC address cleanup. (eli) + + HDFS-3373. Change DFSClient input stream socket cache to global static and + add a thread to cleanup expired cache entries. (John George via szetszwo) + + HDFS-3896. Add descriptions for dfs.namenode.rpc-address and + dfs.namenode.servicerpc-address to hdfs-default.xml. (Jeff Lord via atm) + + HDFS-3996. Add debug log removed in HDFS-3873 back. (eli) + + HDFS-3916. libwebhdfs (C client) code cleanups. + (Colin Patrick McCabe via eli) + + HDFS-3813. Log error message if security and WebHDFS are enabled but + principal/keytab are not configured. (Stephen Chu via atm) + + HDFS-3483. Better error message when hdfs fsck is run against a ViewFS + config. (Stephen Fritz via atm) + + HDFS-3682. MiniDFSCluster#init should provide more info when it fails. + (todd via eli) + + HDFS-4008. TestBalancerWithEncryptedTransfer needs a timeout. (eli) + + HDFS-4007. Rehabilitate bit-rotted unit tests under + hadoop-hdfs-project/hadoop-hdfs/src/test/unit/ + (Colin Patrick McCabe via todd) + + HDFS-4041. Hadoop HDFS Maven protoc calls must not depend on external + sh script. (Chris Nauroth via suresh) + + HADOOP-8911. CRLF characters in source and text files. + (Raja Aluri via suresh) + + HDFS-4037. Rename the getReplication() method in BlockCollection to + getBlockReplication(). (szetszwo) + + HDFS-4036. Remove "throws UnresolvedLinkException" from + FSDirectory.unprotectedAddFile(..). (Jing Zhao via szetszwo) + + HDFS-2946. HA: Put a cap on the number of completed edits files retained + by the NN. (atm) + + HDFS-4029. GenerationStamp should use an AtomicLong. (eli) + + HDFS-4068. DatanodeID and DatanodeInfo member should be private. (eli) + + HDFS-4073. Two minor improvements to FSDirectory. (Jing Zhao via szetszwo) + + HDFS-4074. Remove the unused default constructor from INode. (Brandon Li + via szetszwo) + + HDFS-4088. Remove "throws QuotaExceededException" from an + INodeDirectoryWithQuota constructor. (szetszwo) + + HDFS-4099. Clean up replication code and add more javadoc. (szetszwo) + + HDFS-4107. Add utility methods for casting INode to INodeFile and + INodeFileUnderConstruction. (szetszwo) + + HDFS-4112. A few improvements on INodeDirectory include adding a utility + method for casting; avoiding creation of new empty lists; cleaning up + some code and rewriting some javadoc. (szetszwo) + + HDFS-4121. Add namespace declarations in hdfs .proto files for languages + other than java. (Binglin Chang via suresh) + + HDFS-3573. Supply NamespaceInfo when instantiating JournalManagers. + (todd and ivank via umamahesh) + + HDFS-3695. Genericize format() to non-file JournalManagers. + (todd via umamahesh) + + HDFS-3789. JournalManager#format() should be able to throw IOException. + (Ivan Kelly via umamahesh) + + HDFS-3809. Make BKJM use protobufs for all serialization with ZK. + (Ivan Kelly via umamhesh) + + HDFS-3916. libwebhdfs testing code cleanup. (Jing Zhao via suresh) + + HDFS-4143. Change blocks to private in INodeFile and renames isLink() to + isSymlink() in INode. (szetszwo) + + HDFS-4046. Rename ChecksumTypeProto enum NULL since it is illegal in + C/C++. (Binglin Chang via suresh) + + HDFS-4048. Use ERROR instead of INFO for volume failure logs. + (Stephen Chu via eli) + + HDFS-1322. Document umask in DistributedFileSystem#mkdirs javadocs. + (Colin Patrick McCabe via eli) + + HDFS-4038. Override toString() for BookKeeperEditLogInputStream. + (Vinay via umamahesh) + + HDFS-4214. OfflineEditsViewer should print out the offset at which it + encountered an error. (Colin Patrick McCabe via atm) + + HDFS-4199. Provide test for HdfsVolumeId. (Ivan A. Veselovsky via atm) + + HDFS-3049. During the normal NN startup process, fall back on a different + edit log if we see one that is corrupt (Colin Patrick McCabe via todd) + + HDFS-3571. Allow EditLogFileInputStream to read from a remote URL (todd) + + HDFS-4110. Refine a log printed in JNStorage. (Liang Xie via suresh) + + HDFS-4153. Add START_MSG/SHUTDOWN_MSG for JournalNode. (liang xie via atm) + + HDFS-3935. Add JournalNode to the start/stop scripts (Andy Isaacson via todd) + + HDFS-4268. Remove redundant enum NNHAStatusHeartbeat.State. (shv) + + HDFS-3680. Allow customized audit logging in HDFS FSNamesystem. (Marcelo + Vanzin via atm) + + HDFS-4130. BKJM: The reading for editlog at NN starting using bkjm is not efficient. + (Han Xiao via umamahesh) + + HDFS-4326. bump up Tomcat version for HttpFS to 6.0.36. (tucu via acmurthy) + + HDFS-4270. Introduce soft and hard limits for max replication so that + replications of the highest priority are allowed to choose a source datanode + that has reached its soft limit but not the hard limit. (Derek Dagit via + szetszwo) + + HADOOP-9173. Add security token protobuf definition to common and + use it in hdfs. (suresh) + + HDFS-4030. BlockManager excessBlocksCount and + postponedMisreplicatedBlocksCount should be AtomicLongs. (eli) + + HDFS-4031. Update findbugsExcludeFile.xml to include findbugs 2 + exclusions. (eli) + + HDFS-4033. Miscellaneous findbugs 2 fixes. (eli) + + HDFS-4034. Remove redundant null checks. (eli) + + HDFS-4035. LightWeightGSet and LightWeightHashSet increment a + volatile without synchronization. (eli) + + HDFS-4032. Specify the charset explicitly rather than rely on the + default. (eli) + + HDFS-4363. Combine PBHelper and HdfsProtoUtil and remove redundant + methods. (suresh) + + HDFS-4377. Some trivial DN comment cleanup. (eli) + + HDFS-4381. Document fsimage format details in FSImageFormat class javadoc. + (Jing Zhao via suresh) + + HDFS-4375. Use token request messages defined in hadoop common. + (suresh) + + HDFS-4392. Use NetUtils#getFreeSocketPort in MiniDFSCluster. + (Andrew Purtell via suresh) + + HDFS-4393. Make empty request and responses in protocol translators can be + static final members. (Brandon Li via suresh) + + HDFS-4403. DFSClient can infer checksum type when not provided by reading + first byte (todd) + + HDFS-4259. Improve pipeline DN replacement failure message (harsh) + + HDFS-3598. WebHDFS support for file concat. (Plamen Jeliazkov via shv) + + HDFS-4456. Add concat to HttpFS and WebHDFS REST API docs. (plamenj2003 via tucu) + + HDFS-3131. Improve TestStorageRestore. (Brandon Li via atm) + + OPTIMIZATIONS + + HDFS-3429. DataNode reads checksums even if client does not need them (todd) + + BUG FIXES + + HDFS-3919. MiniDFSCluster:waitClusterUp can hang forever. + (Andy Isaacson via eli) + + HDFS-3924. Multi-byte id in HdfsVolumeId. (Andrew Wang via atm) + + HDFS-3936. MiniDFSCluster shutdown races with BlocksMap usage. (eli) + + HDFS-3951. datanode web ui does not work over HTTPS when datanode is started in secure mode. (tucu) + + HDFS-3949. NameNodeRpcServer#join should join on both client and + server RPC servers. (eli) + + HDFS-3932. NameNode Web UI broken if the rpc-address is set to the wildcard. + (Colin Patrick McCabe via eli) + + HDFS-3931. TestDatanodeBlockScanner#testBlockCorruptionPolicy2 is broken. + (Andy Isaacson via eli) + + HDFS-3964. Make NN log of fs.defaultFS debug rather than info. (eli) + + HDFS-3992. Method org.apache.hadoop.hdfs.TestHftpFileSystem.tearDown() + sometimes throws NPEs. (Ivan A. Veselovsky via atm) + + HDFS-3753. Tests don't run with native libraries. + (Colin Patrick McCabe via eli) + + HDFS-4000. TestParallelLocalRead fails with "input ByteBuffers + must be direct buffers". (Colin Patrick McCabe via eli) + + HDFS-3999. HttpFS OPEN operation expects len parameter, it should be length. (tucu) + + HDFS-4006. TestCheckpoint#testSecondaryHasVeryOutOfDateImage + occasionally fails due to unexpected exit. (todd via eli) + + HDFS-4018. testMiniDFSClusterWithMultipleNN is missing some + cluster cleanup. (eli) + + HDFS-4020. TestRBWBlockInvalidation may time out. (eli) + + HDFS-4021. Misleading error message when resources are low on the NameNode. + (Christopher Conner via atm) + + HDFS-4044. Duplicate ChecksumType definition in HDFS .proto files. + (Binglin Chang via suresh) + + HDFS-4049. Fix hflush performance regression due to nagling delays + (todd) + + HDFS-3678. Edit log files are never being purged from 2NN. (atm) + + HDFS-4058. DirectoryScanner may fail with IOOB if the directory + scanning threads return out of volume order. (eli) + + HDFS-3985. Add timeouts to TestMulitipleNNDataBlockScanner. (todd via eli) + + HDFS-4061. TestBalancer and TestUnderReplicatedBlocks need timeouts. (eli) + + HDFS-3997. OfflineImageViewer incorrectly passes value of imageVersion when + visiting IS_COMPRESSED element. (Mithun Radhakrishnan via atm) + + HDFS-4055. TestAuditLogs is flaky. (Binglin Chang via eli) + + HDFS-4072. On file deletion remove corresponding blocks pending + replications. (Jing Zhao via suresh) + + HDFS-4022. Replication not happening for appended block. + (Vinay via umamahesh) + + HDFS-3948. Do not use hflush in TestWebHDFS.testNamenodeRestart() since the + out stream returned by WebHdfsFileSystem does not support it. (Jing Zhao + via szetszwo) + + HDFS-3616. Fix a ConcurrentModificationException bug that BP actor threads + may not be shutdown properly in DataNode. (Jing Zhao via szetszwo) + + HDFS-4127. Log message is not correct in case of short of replica. + (Junping Du via suresh) + + HADOOP-8994. TestDFSShell creates file named "noFileHere", making further + tests hard to understand (Andy Isaacson via daryn) + + HDFS-3804. TestHftpFileSystem fails intermittently with JDK7 + (Trevor Robinson via daryn) + + HDFS-4132. When libwebhdfs is not enabled, nativeMiniDfsClient frees + uninitialized memory (Colin Patrick McCabe via todd) + + HDFS-1331. dfs -test should work like /bin/test (Andy Isaacson via daryn) + + HDFS-3979. For hsync, datanode should wait for the local sync to complete + before sending ack. (Lars Hofhansl via szetszwo) + + HDFS-3810. Implement format() for BKJM (Ivan Kelly via umamahesh) + + HDFS-3625. Fix TestBackupNode by properly initializing edit log during + startup. (Junping Du via todd) + + HDFS-4138. BackupNode startup fails due to uninitialized edit log. + (Kihwal Lee via shv) + + HDFS-4162. Some malformed and unquoted HTML strings are returned from + datanode web ui. (Darek Dagit via suresh) + + HDFS-4164. fuse_dfs: add -lrt to the compiler command line on Linux. + (Colin Patrick McCabe via eli) + + HDFS-3921. NN will prematurely consider blocks missing when entering active + state while still in safe mode. (atm) + + HDFS-4106. BPServiceActor#lastHeartbeat, lastBlockReport and + lastDeletedReport should be volatile. (Jing Zhao via suresh) + + HDFS-4139. fuse-dfs RO mode still allows file truncation. + (Colin Patrick McCabe via eli) + + HDFS-4104. dfs -test -d prints inappropriate error on nonexistent directory + (Andy Isaacson via daryn) + + HDFS-3623. BKJM: zkLatchWaitTimeout hard coded to 6000. Make use of ZKSessionTimeout instead. + (umamahesh) + + HDFS-4100. Fix all findbug security warings. (Liang Xie via eli) + + HDFS-3507. DFS#isInSafeMode needs to execute only on Active NameNode. + (Vinay via atm) + + HDFS-4105. The SPNEGO user for secondary namenode should use the web + keytab. (Arpit Gupta via jitendra) + + HDFS-4156. Seeking to a negative position should throw an IOE. + (Eli Reisman via eli) + + HDFS-4171. WebHDFS and HttpFs should accept only valid Unix user + names. (tucu) + + HDFS-4178. Shell scripts should not close stderr (Andy Isaacson via daryn) + + HDFS-4179. BackupNode: allow reads, fix checkpointing, safeMode. (shv) + + HDFS-4216. Do not ignore QuotaExceededException when adding symlinks. + (szetszwo) + + HDFS-4242. Map.Entry is incorrectly used in LeaseManager since the behavior + of it is undefined after the iteration or modifications of the map. + (szetszwo) + + HDFS-4231. BackupNode: Introduce BackupState. (shv) + + HDFS-4238. Standby namenode should not do purging of shared + storage edits. (todd) + + HDFS-4282. TestEditLog.testFuzzSequences FAILED in all pre-commit test + (todd) + + HDFS-4236. Remove artificial limit on username length introduced in + HDFS-4171. (tucu via suresh) + + HDFS-4279. NameNode does not initialize generic conf keys when started + with -recover. (Colin Patrick McCabe via atm) + + HDFS-4291. edit log unit tests leave stray test_edit_log_file around + (Colin Patrick McCabe via todd) + + HDFS-4292. Sanity check not correct in RemoteBlockReader2.newBlockReader + (Binglin Chang via todd) + + HDFS-4295. Using port 1023 should be valid when starting Secure DataNode + (Stephen Chu via todd) + + HDFS-4294. Backwards compatibility is not maintained for TestVolumeId. + (Ivan A. Veselovsky and Robert Parker via atm) + + HDFS-2264. NamenodeProtocol has the wrong value for clientPrincipal in + KerberosInfo annotation. (atm) + + HDFS-4307. SocketCache should use monotonic time. (Colin Patrick McCabe + via atm) + + HDFS-4315. DNs with multiple BPs can have BPOfferServices fail to start + due to unsynchronized map access. (atm) + + HDFS-4140. fuse-dfs handles open(O_TRUNC) poorly. (Colin Patrick McCabe + via atm) + + HDFS-4308. addBlock() should persist file blocks once. + (Plamen Jeliazkov via shv) + + HDFS-4347. Avoid infinite waiting checkpoint to complete in TestBackupNode. + (Plamen Jeliazkov via shv) + + HDFS-4349. Add test for reading files from BackupNode. (shv) + + HDFS-4302. Fix fatal exception when starting NameNode with DEBUG logs + (Eugene Koontz via todd) + + HDFS-3970. Fix bug causing rollback of HDFS upgrade to result in bad + VERSION file. (Vinay and Andrew Wang via atm) + + HDFS-4306. PBHelper.convertLocatedBlock miss convert BlockToken. (Binglin + Chang via atm) + + HDFS-4384. test_libhdfs_threaded gets SEGV if JNIEnv cannot be + initialized. (Colin Patrick McCabe via eli) + + HDFS-4328. TestLargeBlock#testLargeBlockSize is timing out. (Chris Nauroth + via atm) + + HDFS-4274. BlockPoolSliceScanner does not close verification log during + shutdown. (Chris Nauroth via suresh) + + HDFS-1245. Pluggable block id generation. (shv) + + HDFS-4415. HostnameFilter should handle hostname resolution failures and + continue processing. (Robert Kanter via atm) + + HDFS-4359. Slow RPC responses from NN can prevent metrics collection on + DNs. (liang xie via atm) + + HDFS-4444. Add space between total transaction time and number of + transactions in FSEditLog#printStatistics. (Stephen Chu via suresh) + + HDFS-4428. FsDatasetImpl should disclose what the error is when a rename + fails. (Colin Patrick McCabe via atm) + + HDFS-4452. getAdditionalBlock() can create multiple blocks if the client + times out and retries. (shv) + + HDFS-4445. All BKJM ledgers are not checked while tailing, So failover will fail. + (Vinay via umamahesh) + + HDFS-4462. 2NN will fail to checkpoint after an HDFS upgrade from a + pre-federation version of HDFS. (atm) + + HDFS-4404. Create file failure when the machine of first attempted NameNode + is down. (Todd Lipcon via atm) + + HDFS-4344. dfshealth.jsp throws NumberFormatException when + dfs.hosts/dfs.hosts.exclude includes port number. (Andy Isaacson via atm) + + HDFS-4468. Use the new StringUtils methods added by HADOOP-9252 and fix + TestHDFSCLI and TestQuota. (szetszwo) + + HDFS-4458. In DFSUtil.getNameServiceUris(..), convert default fs URI using + NetUtils.createSocketAddr(..) for being consistent with other addresses. + (Binglin Chang via szetszwo) + + BREAKDOWN OF HDFS-3077 SUBTASKS + + HDFS-3077. Quorum-based protocol for reading and writing edit logs. + (todd, Brandon Li, and Hari Mankude via todd) + + HDFS-3694. Fix getEditLogManifest to fetch httpPort if necessary (todd) + + HDFS-3692. Support purgeEditLogs() call to remotely purge logs on JNs + (todd) + + HDFS-3693. JNStorage should read its storage info even before a writer + becomes active (todd) + + HDFS-3725. Fix QJM startup when individual JNs have gaps (todd) + + HDFS-3741. Exhaustive failure injection test for skipped RPCs (todd) + + HDFS-3773. TestNNWithQJM fails after HDFS-3741. (atm) + + HDFS-3793. Implement genericized format() in QJM (todd) + + HDFS-3795. QJM: validate journal dir at startup (todd) + + HDFS-3798. Avoid throwing NPE when finalizeSegment() is called on invalid + segment (todd) + + HDFS-3799. QJM: handle empty log segments during recovery (todd) + + HDFS-3797. QJM: add segment txid as a parameter to journal() RPC (todd) + + HDFS-3800. improvements to QJM fault testing (todd) + + HDFS-3823. QJM: TestQJMWithFaults fails occasionally because of missed + setting of HTTP port. (todd and atm) + + HDFS-3826. QJM: Some trivial logging / exception text improvements. (todd + and atm) + + HDFS-3839. QJM: hadoop-daemon.sh should be updated to accept "journalnode" + (eli) + + HDFS-3845. Fixes for edge cases in QJM recovery protocol (todd) + + HDFS-3877. QJM: Provide defaults for dfs.journalnode.*address (eli) + + HDFS-3863. Track last "committed" txid in QJM (todd) + + HDFS-3869. Expose non-file journal manager details in web UI (todd) + + HDFS-3884. Journal format() should reset cached values (todd) + + HDFS-3870. Add metrics to JournalNode (todd) + + HDFS-3891. Make selectInputStreams throw IOE instead of RTE (todd) + + HDFS-3726. If a logger misses an RPC, don't retry that logger until next + segment (todd) + + HDFS-3893. QJM: Make QJM work with security enabled. (atm) + + HDFS-3897. QJM: TestBlockToken fails after HDFS-3893. (atm) + + HDFS-3898. QJM: enable TCP_NODELAY for IPC (todd) + + HDFS-3885. QJM: optimize log sync when JN is lagging behind (todd) + + HDFS-3900. QJM: avoid validating log segments on log rolls (todd) + + HDFS-3901. QJM: send 'heartbeat' messages to JNs even when they are + out-of-sync (todd) + + HDFS-3899. QJM: Add client-side metrics (todd) + + HDFS-3914. QJM: acceptRecovery should abort current segment (todd) + + HDFS-3915. QJM: Failover fails with auth error in secure cluster (todd) + + HDFS-3906. QJM: quorum timeout on failover with large log segment (todd) + + HDFS-3840. JournalNodes log JournalNotFormattedException backtrace error + before being formatted (todd) + + HDFS-3894. QJM: testRecoverAfterDoubleFailures can be flaky due to IPC + client caching (todd) + + HDFS-3926. QJM: Add user documentation for QJM. (atm) + + HDFS-3943. QJM: remove currently-unused md5sum field (todd) + + HDFS-3950. QJM: misc TODO cleanup, improved log messages, etc. (todd) + + HDFS-3955. QJM: Make acceptRecovery() atomic. (todd) + + HDFS-3956. QJM: purge temporary files when no longer within retention + period (todd) + + HDFS-4004. TestJournalNode#testJournal fails because of test case execution + order (Chao Shi via todd) + + HDFS-4017. Unclosed FileInputStream in GetJournalEditServlet + (Chao Shi via todd) + + HDFS-4351. In BlockPlacementPolicyDefault.chooseTarget(..), numOfReplicas + needs to be updated when avoiding stale nodes. (Andrew Wang via szetszwo) + + HDFS-2908. Add apache license header for StorageReport.java. (Brandon Li + via tgraves) + + HDFS-4399. Fix RAT warnings by excluding images sub-dir in docs. (Thomas + Graves via acmurthy) + +Release 2.0.2-alpha - 2012-09-07 + + INCOMPATIBLE CHANGES + + HDFS-3446. HostsFileReader silently ignores bad includes/excludes + (Matthew Jacobs via todd) + + HDFS-3755. Creating an already-open-for-write file with overwrite=true fails + (todd) + + NEW FEATURES + + HDFS-744. Support hsync in HDFS. (Lars Hofhansl via szetszwo) + + HDFS-3042. Automatic failover support for NameNode HA (todd) + (see dedicated section below for breakdown of subtasks) + + HDFS-3518. Add a utility method HdfsUtils.isHealthy(uri) for checking if + the given HDFS is healthy. (szetszwo) + + HDFS-3113. httpfs does not support delegation tokens. (tucu) + + HDFS-3513. HttpFS should cache filesystems. (tucu) + + HDFS-3637. Add support for encrypting the DataTransferProtocol. (atm) + + HDFS-3150. Add option for clients to contact DNs via hostname. (eli) + + HDFS-2793. Add an admin command to trigger an edit log roll. (todd) + + HDFS-3703. Datanodes are marked stale if heartbeat is not received in + configured timeout and are selected as the last location to read from. + (Jing Zhao via suresh) + + IMPROVEMENTS + + HDFS-3040. TestMulitipleNNDataBlockScanner is misspelled. (Madhukara Phatak + via atm) + + HDFS-3390. DFSAdmin should print full stack traces of errors when DEBUG + logging is enabled. (atm) + + HDFS-3341. Change minimum RPC versions to respective SNAPSHOTs instead of + final releases. (todd) + + HDFS-3369. Rename {get|set|add}INode(..) methods in BlockManager and + BlocksMap to {get|set|add}BlockCollection(..). (John George via szetszwo) + + HDFS-3134. harden edit log loader against malformed or malicious input. + (Colin Patrick McCabe via eli) + + HDFS-3230. Cleanup DatanodeID creation in the tests. (eli) + + HDFS-3401. Cleanup DatanodeDescriptor creation in the tests. (eli) + + HDFS-3400. DNs should be able start with jsvc even if security is disabled. + (atm via eli) + + HDFS-3404. Make putImage in GetImageServlet infer remote address to fetch + from request. (atm) + + HDFS-3335. check for edit log corruption at the end of the log + (Colin Patrick McCabe via todd) + + HDFS-3417. Rename BalancerDatanode#getName to getDisplayName to be + consistent with Datanode. (eli) + + HDFS-3416. Cleanup DatanodeID and DatanodeRegistration + constructors used by testing. (eli) + + HDFS-3419. Cleanup LocatedBlock. (eli) + + HDFS-3440. More effectively limit stream memory consumption when reading + corrupt edit logs (Colin Patrick McCabe via todd) + + HDFS-3438. BootstrapStandby should not require a rollEdits on active node + (todd) + + HDFS-2885. Remove "federation" from the nameservice config options. + (Tsz Wo (Nicholas) Sze via eli) + + HDFS-3394. Do not use generic in INodeFile.getLastBlock(): the run-time + ClassCastException check is useless since generic type information is only + available in compile-time. (szetszwo) + + HDFS-3454. Balancer unconditionally logs InterruptedException at + INFO level on shutdown if security is enabled. (eli) + + HDFS-1013. Miscellaneous improvements to HTML markup for web UIs + (Eugene Koontz via todd) + + HDFS-3052. Change INodeFile and INodeFileUnderConstruction to package + private. (szetszwo) + + HDFS-3520. Add transfer rate logging to TransferFsImage. (eli) + + HDFS-3504. Support configurable retry policy in DFSClient for RPC + connections and RPC calls, and add MultipleLinearRandomRetry, a new retry + policy. (szetszwo) + + HDFS-3372. offlineEditsViewer should be able to read a binary + edits file with recovery mode. (Colin Patrick McCabe via eli) + + HDFS-3516. Check content-type in WebHdfsFileSystem. (szetszwo) + + HDFS-3535. Audit logging should log denied accesses. (Andy Isaacson via eli) + + HDFS-3481. Refactor HttpFS handling of JAX-RS query string parameters (tucu) + + HDFS-3572. Cleanup code which inits SPNEGO in HttpServer (todd) + + HDFS-3475. Make the replication monitor multipliers configurable. + (harsh via eli) + + HDFS-3343. Improve metrics for DN read latency (Andrew Wang via todd) + + HDFS-3170. Add more useful metrics for write latency (Matthew Jacobs via + todd) + + HDFS-3604. Add dfs.webhdfs.enabled to hdfs-default.xml. (eli) + + HDFS-2988. Improve error message when storage directory lock fails + (Miomir Boljanovic via harsh) + + HDFS-2391. Newly set BalancerBandwidth value is not displayed anywhere. + (harsh) + + HDFS-3067. NPE in DFSInputStream.readBuffer if read is repeated on + corrupted block. (Henry Robinson via atm) + + HDFS-3555. idle client socket triggers DN ERROR log + (should be INFO or DEBUG). (Andy Isaacson via harsh) + + HDFS-3568. fuse_dfs: add support for security. (Colin McCabe via atm) + + HDFS-3629. Fix the typo in the error message about inconsistent + storage layout version. (Brandon Li via harsh) + + HDFS-3613. GSet prints some INFO level values, which aren't + really very useful to all (Andrew Wang via harsh) + + HDFS-3611. NameNode prints unnecessary WARNs about edit log normally skipping + a few bytes. (Colin Patrick McCabe via harsh) + + HDFS-3582. Hook daemon process exit for testing. (eli) + + HDFS-3641. Move server Util time methods to common and use now + instead of System#currentTimeMillis. (eli) + + HDFS-3633. libhdfs: hdfsDelete should pass JNI_FALSE or JNI_TRUE. + (Colin Patrick McCabe via eli) + + HDFS-799. libhdfs must call DetachCurrentThread when a thread is destroyed. + (Colin Patrick McCabe via eli) + + HDFS-3306. fuse_dfs: don't lock release operations. + (Colin Patrick McCabe via eli) + + HDFS-3612. Single namenode image directory config warning can + be improved. (Andy Isaacson via harsh) + + HDFS-3606. libhdfs: create self-contained unit test. + (Colin Patrick McCabe via eli) + + HDFS-3539. libhdfs code cleanups. (Colin Patrick McCabe via eli) + + HDFS-3610. fuse_dfs: Provide a way to use the default (configured) NN URI. + (Colin Patrick McCabe via eli) + + HDFS-3663. MiniDFSCluster should capture the code path that led to + the first ExitException. (eli) + + HDFS-3659. Add missing @Override to methods across the hadoop-hdfs + project. (Brandon Li via harsh) + + HDFS-3537. Move libhdfs and fuse-dfs source to native subdirectories. + (Colin Patrick McCabe via eli) + + HDFS-3665. Add a test for renaming across file systems via a symlink. (eli) + + HDFS-3666. Plumb more exception messages to terminate. (eli) + + HDFS-3673. libhdfs: fix some compiler warnings. (Colin Patrick McCabe via eli) + + HDFS-3675. libhdfs: follow documented return codes. (Colin Patrick McCabe via eli) + + HDFS-1249. With fuse-dfs, chown which only has owner (or only group) + argument fails with Input/output error. (Colin Patrick McCabe via eli) + + HDFS-3583. Convert remaining tests to Junit4. (Andrew Wang via atm) + + HDFS-3711. Manually convert remaining tests to JUnit4. (Andrew Wang via atm) + + HDFS-3650. Use MutableQuantiles to provide latency histograms for various + operations. (Andrew Wang via atm) + + HDFS-3667. Add retry support to WebHdfsFileSystem. (szetszwo) + + HDFS-3291. add test that covers HttpFS working w/ a non-HDFS Hadoop + filesystem (tucu) + + HDFS-3634. Add self-contained, mavenized fuse_dfs test. (Colin Patrick + McCabe via atm) + + HDFS-3190. Simple refactors in existing NN code to assist + QuorumJournalManager extension. (todd) + + HDFS-3276. initializeSharedEdits should have a -nonInteractive flag (todd) + + HDFS-3765. namenode -initializeSharedEdits should be able to initialize + all shared storages. (Vinay and todd via todd) + + HDFS-3723. Add support -h, -help to all the commands. (Jing Zhao via + suresh) + + HDFS-3803. Change BlockPoolSliceScanner chatty INFO log to DEBUG. + (Andrew Purtell via suresh) + + HDFS-3802. StartupOption.name in HdfsServerConstants should be final. + (Jing Zhao via szetszwo) + + HDFS-3796. Speed up edit log tests by avoiding fsync() (todd) + + HDFS-2963. Console Output is confusing while executing metasave + (dfsadmin command). (Andrew Wang via eli) + + HDFS-3672. Expose disk-location information for blocks to enable better + scheduling. (Andrew Wang via atm) + + HDFS-2727. libhdfs should get the default block size from the server. + (Colin Patrick McCabe via eli) + + HDFS-3832. Remove protocol methods related to DistributedUpgrade. (suresh) + + HDFS-3819. Should check whether invalidate work percentage default value is + not greater than 1.0f. (Jing Zhao via jitendra) + + HDFS-3177. Update DFSClient and DataXceiver to handle different checkum + types in file checksum computation. (Kihwal Lee via szetszwo) + + HDFS-3844. Add @Override and remove {@inheritdoc} and unnecessary + imports. (Jing Zhao via suresh) + + HDFS-3853. Port MiniDFSCluster enableManagedDfsDirsRedundancy + option to branch-2. (Colin Patrick McCabe via eli) + + HDFS-3871. Change NameNodeProxies to use RetryUtils. (Arun C Murthy + via szetszwo) + + HDFS-3887. Remove redundant chooseTarget methods in BlockPlacementPolicy. + (Jing Zhao via szetszwo) + + HDFS-3888. Clean up BlockPlacementPolicyDefault. (Jing Zhao via szetszwo) + + HDFS-3907. Allow multiple users for local block readers. (eli) + + HDFS-3510. Editlog pre-allocation is performed prior to writing edits + to avoid partial edits case disk out of space. (Colin McCabe via todd) + + HDFS-3910. DFSTestUtil#waitReplication should timeout. (eli) + + HDFS-3920. libwebdhfs string processing and using strerror consistently + to handle all errors. (Jing Zhao via suresh) + + OPTIMIZATIONS + + HDFS-2982. Startup performance suffers when there are many edit log + segments. (Colin Patrick McCabe via todd) + + HDFS-2834. Add a ByteBuffer-based read API to DFSInputStream. + (Henry Robinson via todd) + + HDFS-3110. Use directRead API to reduce the number of buffer copies in + libhdfs (Henry Robinson via todd) + + HDFS-3697. Enable fadvise readahead by default. (todd) + + HDFS-2421. Improve the concurrency of SerialNumberMap in NameNode. + (Jing Zhao and Weiyan Wang via szetszwo) + + HDFS-3866. HttpFS POM should have property where to download tomcat from (zero45 via tucu) + + BUG FIXES + + HDFS-3385. The last block of INodeFileUnderConstruction is not + necessarily a BlockInfoUnderConstruction, so do not cast it in + FSNamesystem.recoverLeaseInternal(..). (szetszwo) + + HDFS-3414. Balancer does not find NameNode if rpc-address or + servicerpc-address are not set in client configs. (atm) + + HDFS-3031. Fix complete() and getAdditionalBlock() RPCs to be idempotent + (todd) + + HDFS-2759. Pre-allocate HDFS edit log files after writing version number. + (atm) + + HDFS-3413. TestFailureToReadEdits timing out. (atm) + + HDFS-3422. TestStandbyIsHot timeouts too aggressive (todd) + + HDFS-3433. GetImageServlet should allow administrative requestors when + security is enabled. (atm) + + HDFS-1153. dfsnodelist.jsp should handle invalid input parameters. + (Ravi Phulari via eli) + + HDFS-3434. InvalidProtocolBufferException when visiting DN + browseDirectory.jsp (eli) + + HDFS-2800. Fix cancellation of checkpoints in the standby node to be more + reliable. (todd) + + HDFS-3391. Fix InvalidateBlocks to compare blocks including their + generation stamps. (todd) + + HDFS-3444. hdfs groups command doesn't work with security enabled. (atm) + + HDFS-2717. BookKeeper Journal output stream doesn't check addComplete rc. + (Ivan Kelly via umamahesh) + + HDFS-3415. Make sure all layout versions are the same for all storage + directories in the Namenode. (Brandon Li via szetszwo) + + HDFS-3436. In DataNode.transferReplicaForPipelineRecovery(..), it should + use the stored generation stamp to check if the block is valid. (Vinay + via szetszwo) + + HDFS-3460. HttpFS proxyuser validation with Kerberos ON uses full + principal name. (tucu) + + HDFS-3058. HA: Bring BookKeeperJournalManager up to date with HA changes. + (Ivan Kelly via umamahesh) + + HDFS-3368. Missing blocks due to bad DataNodes coming up and down. (shv) + + HDFS-3452. BKJM:Switch from standby to active fails and NN gets shut down + due to delay in clearing of lock. (umamahesh) + + HDFS-3398. Client will not retry when primaryDN is down once it's just got pipeline. + (Amith D K via umamahesh) + + HDFS-3474. Cleanup Exception handling in BookKeeper journal manager. + (Ivan Kelly via umamahesh) + + HDFS-3468. Make BKJM-ZK session timeout configurable. (umamahesh) + + HDFS-3423. BKJM: NN startup is failing, when tries to recoverUnfinalizedSegments() + a bad inProgress_ ZNodes. (Ivan Kelly and Uma via umamahesh) + + HDFS-3441. Race condition between rolling logs at active NN and purging at standby. + (Rakesh R via umamahesh) + + HDFS-3484. hdfs fsck doesn't work if NN HTTP address is set to + 0.0.0.0 even if NN RPC address is configured. (atm via eli) + + HDFS-3486. offlineimageviewer can't read fsimage files that contain + persistent delegation tokens. (Colin Patrick McCabe via eli) + + HDFS-3487. offlineimageviewer should give byte offset information + when it encounters an exception. (Colin Patrick McCabe via eli) + + HDFS-3442. Incorrect count for Missing Replicas in FSCK report. (Andrew + Wang via atm) + + HDFS-2025. Go Back to File View link is not working in tail.jsp. + (Ashish and Sravan via umamahesh) + + HDFS-3501. Checkpointing with security enabled will stop working + after ticket lifetime expires. (atm via eli) + + HDFS-3266. DFSTestUtil#waitCorruptReplicas doesn't sleep between checks. + (Madhukara Phatak via atm) + + HDFS-3505. DirectoryScanner does not join all threads in shutdown. + (Colin Patrick McCabe via eli) + + HDFS-3485. DataTransferThrottler will over-throttle when currentTimeMillis + jumps (Andy Isaacson via todd) + + HDFS-2914. HA: Standby should not enter safemode when resources are low. + (Vinay via atm) + + HDFS-3235. MiniDFSClusterManager doesn't correctly support -format option. + (Henry Robinson via atm) + + HDFS-3514. Add missing TestParallelLocalRead. (Henry Robinson via atm) + + HDFS-3243. TestParallelRead timing out on jenkins. (Henry Robinson via todd) + + HDFS-3490. DatanodeWebHdfsMethods throws NullPointerException if + NamenodeRpcAddressParam is not set. (szetszwo) + + HDFS-2797. Fix misuses of InputStream#skip in the edit log code. + (Colin Patrick McCabe via eli) + + HDFS-3517. TestStartup should bind ephemeral ports. (eli) + + HDFS-3522. If a namenode is in safemode, it should throw SafeModeException + when getBlockLocations has zero locations. (Brandon Li via szetszwo) + + HDFS-3408. BKJM : Namenode format fails, if there is no BK root. (Rakesh R via umamahesh) + + HDFS-3389. Document the BKJM usage in Namenode HA. (umamahesh and Ivan Kelly via umamahesh) + + HDFS-3531. EditLogFileOutputStream#preallocate should check for + incomplete writes. (Colin Patrick McCabe via eli) + + HDFS-766. Error message not clear for set space quota out of boundary + values. (Jon Zuanich via atm) + + HDFS-3480. Multiple SLF4J binding warning. (Vinay via eli) + + HDFS-3524. Update TestFileLengthOnClusterRestart for HDFS-3522. (Brandon + Li via szetszwo) + + HDFS-3559. DFSTestUtil: use Builder class to construct DFSTestUtil + instances. (Colin Patrick McCabe via atm) + + HDFS-3551. WebHDFS CREATE should use client location for HTTP redirection. + (szetszwo) + + HDFS-3157. Fix a bug in the case that the generation stamps of the stored + block in a namenode and the reported block from a datanode do not match. + (Ashish Singhi via szetszwo) + + HDFS-3575. HttpFS does not log Exception Stacktraces (brocknoland via tucu) + + HDFS-3574. Fix small race and do some cleanup in GetImageServlet (todd) + + HDFS-3581. FSPermissionChecker#checkPermission sticky bit check + missing range check. (eli) + + HDFS-3541. Deadlock between recovery, xceiver and packet responder. + (Vinay via umamahesh) + + HDFS-3428. Move DelegationTokenRenewer to common (tucu) + + HDFS-3491. HttpFs does not set permissions correctly (tucu) + + HDFS-3580. incompatible types; no instance(s) of type variable(s) V exist + so that V conforms to boolean compiling HttpFSServer.java with OpenJDK + (adi2 via tucu) + + HDFS-3603. Decouple TestHDFSTrash from TestTrash. (Jason Lowe via eli) + + HDFS-711. hdfsUtime does not handle atime = 0 or mtime = 0 correctly. + (Colin Patrick McCabe via eli) + + HDFS-3548. NamenodeFsck.copyBlock fails to create a Block Reader. + (Colin Patrick McCabe via eli) + + HDFS-3615. Two BlockTokenSecretManager findbugs warnings. (atm) + + HDFS-470. libhdfs should handle 0-length reads from FSInputStream + correctly. (Colin Patrick McCabe via eli) + + HDFS-3492. fix some misuses of InputStream#skip. + (Colin Patrick McCabe via eli) + + HDFS-3609. libhdfs: don't force the URI to look like hdfs://hostname:port. + (Colin Patrick McCabe via eli) + + HDFS-2966 TestNameNodeMetrics tests can fail under load. (stevel) + + HDFS-3605. Block mistakenly marked corrupt during edit log catchup + phase of failover. (todd and Brahma Reddy Battula via todd) + + HDFS-3690. BlockPlacementPolicyDefault incorrectly casts LOG. (eli) + + HDFS-3597. SNN fails to start after DFS upgrade. (Andy Isaacson via todd) + + HDFS-3608. fuse_dfs: detect changes in UID ticket cache. (Colin Patrick + McCabe via atm) + + HDFS-3709. TestStartup tests still binding to the ephemeral port. (eli) + + HDFS-3720. hdfs.h must get packaged. (Colin Patrick McCabe via atm) + + HDFS-3626. Creating file with invalid path can corrupt edit log (todd) + + HDFS-3679. fuse_dfs notrash option sets usetrash. (Conrad Meyer via suresh) + + HDFS-3732. fuse_dfs: incorrect configuration value checked for connection + expiry timer period. (Colin Patrick McCabe via atm) + + HDFS-3738. TestDFSClientRetries#testFailuresArePerOperation sets incorrect + timeout config. (atm) + + HDFS-3756. DelegationTokenFetcher creates 2 HTTP connections, the second + one not properly configured. (tucu) + + HDFS-3579. libhdfs: fix exception handling. (Colin Patrick McCabe via atm) + + HDFS-3754. BlockSender doesn't shutdown ReadaheadPool threads. (eli) + + HDFS-3760. primitiveCreate is a write, not a read. (Andy Isaacson via atm) + + HDFS-3710. libhdfs misuses O_RDONLY/WRONLY/RDWR. (Andy Isaacson via atm) + + HDFS-3721. hsync support broke wire compatibility. (todd and atm) + + HDFS-3758. TestFuseDFS test failing. (Colin Patrick McCabe via eli) + + HDFS-2330. In NNStorage.java, IOExceptions of stream closures can mask + root exceptions. (umamahesh via todd) + + HDFS-3790. test_fuse_dfs.c doesn't compile on centos 5. (Colin Patrick + McCabe via atm) + + HDFS-3658. Fix bugs in TestDFSClientRetries and add more tests. (szetszwo) + + HDFS-3794. WebHDFS OPEN returns the incorrect Content-Length in the HTTP + header when offset is specified and length is omitted. + (Ravi Prakash via szetszwo) + + HDFS-3048. Small race in BlockManager#close. (Andy Isaacson via eli) + + HDFS-3194. DataNode block scanner is running too frequently. + (Andy Isaacson via eli) + + HDFS-3808. fuse_dfs: postpone libhdfs intialization until after fork. + (Colin Patrick McCabe via atm) + + HDFS-3788. ByteRangeInputStream should not expect HTTP Content-Length header + when chunked transfer-encoding is used. (szetszwo) + + HDFS-3816. Invalidate work percentage default value should be 0.32f + instead of 32. (Jing Zhao via suresh) + + HDFS-3707. TestFSInputChecker: improper use of skip. + (Colin Patrick McCabe via eli) + + HDFS-3830. test_libhdfs_threaded: use forceNewInstance. + (Colin Patrick McCabe via eli) + + HDFS-3835. Long-lived 2NN cannot perform a checkpoint if security is + enabled and the NN restarts with outstanding delegation tokens. (atm) + + HDFS-3715. Fix TestFileCreation#testFileCreationNamenodeRestart. + (Andrew Whang via eli) + + HDFS-3683. Edit log replay progress indicator shows >100% complete. (Plamen + Jeliazkov via atm) + + HDFS-3731. Release upgrade must handle blocks being written from 1.0. + (Colin Patrick McCabe via eli) + + HDFS-3856. TestHDFSServerPorts failure is causing surefire fork failure. + (eli) + + HDFS-3860. HeartbeatManager#Monitor may wrongly hold the writelock of + namesystem. (Jing Zhao via atm) + + HDFS-3849. When re-loading the FSImage, we should clear the existing + genStamp and leases. (Colin Patrick McCabe via atm) + + HDFS-3864. NN does not update internal file mtime for OP_CLOSE when reading + from the edit log. (atm) + + HDFS-3837. Fix DataNode.recoverBlock findbugs warning. (eli) + + HDFS-3733. Audit logs should include WebHDFS access. (Andy Isaacson via + eli) + + HDFS-2686. Remove DistributedUpgrade related code. (suresh) + + HDFS-3833. TestDFSShell fails on windows due to concurrent file + read/write. (Brandon Li via suresh) + + HDFS-3466. Get HTTP kerberos principal from the web authentication keytab. + (omalley) + + HDFS-3879. Fix findbugs warning in TransferFsImage on branch-2. (eli) + + HDFS-3469. start-dfs.sh will start zkfc, but stop-dfs.sh will not stop zkfc similarly. + (Vinay via umamahesh) + + HDFS-1490. TransferFSImage should timeout (Dmytro Molkov and Vinay via todd) + + HDFS-3828. Block Scanner rescans blocks too frequently. + (Andy Isaacson via eli) + + HDFS-3895. hadoop-client must include commons-cli (tucu) + + HDFS-2757. Cannot read a local block that's being written to when + using the local read short circuit. (Jean-Daniel Cryans via eli) + + HDFS-3664. BlockManager race when stopping active services. + (Colin Patrick McCabe via eli) + + HDFS-3928. MiniDFSCluster should reset the first ExitException on shutdown. (eli) + + HDFS-3938. remove current limitations from HttpFS docs. (tucu) + + HDFS-3944. Httpfs resolveAuthority() is not resolving host correctly. (tucu) + + HDFS-3972. Trash emptier fails in secure HA cluster. (todd via eli) + + HDFS-4443. Remove a trailing '`' character from the HTML code generated by + NamenodeJspHelper.generateNodeData(..). (Christian Rohling via szetszwo) + + BREAKDOWN OF HDFS-3042 SUBTASKS + + HDFS-2185. HDFS portion of ZK-based FailoverController (todd) + + HDFS-3200. Scope all ZKFC configurations by nameservice (todd) + + HDFS-3223. add zkfc to hadoop-daemon.sh script (todd) + + HDFS-3261. TestHASafeMode fails on HDFS-3042 branch (todd) + + HDFS-3159. Document NN auto-failover setup and configuration (todd) + + HDFS-3412. Fix findbugs warnings in auto-HA branch (todd) + + HDFS-3432. TestDFSZKFailoverController tries to fail over too early (todd) + + HDFS-3902. TestDatanodeBlockScanner#testBlockCorruptionPolicy is broken. + (Andy Isaacson via eli) + +Release 2.0.0-alpha - 05-23-2012 + + INCOMPATIBLE CHANGES + + HDFS-2676. Remove Avro RPC. (suresh) + + HDFS-2303. Unbundle jsvc. (Roman Shaposhnik and Mingjie Lai via eli) + + HDFS-3137. Bump LAST_UPGRADABLE_LAYOUT_VERSION to -16. (eli) + + HDFS-3138. Move DatanodeInfo#ipcPort to DatanodeID. (eli) + + HDFS-3164. Move DatanodeInfo#hostName to DatanodeID. (eli) + + NEW FEATURES + + HDFS-2978. The NameNode should expose name dir statuses via JMX. (atm) + + HDFS-395. DFS Scalability: Incremental block reports. (Tomasz Nykiel + via hairong) + + HDFS-2517. Add protobuf service for JounralProtocol. (suresh) + + HDFS-2518. Add protobuf service for NamenodeProtocol. (suresh) + + HDFS-2520. Add protobuf service for InterDatanodeProtocol. (suresh) + + HDFS-2519. Add protobuf service for DatanodeProtocol. (suresh) + + HDFS-2581. Implement protobuf service for JournalProtocol. (suresh) + + HDFS-2618. Implement protobuf service for NamenodeProtocol. (suresh) + + HDFS-2629. Implement protobuf service for InterDatanodeProtocol. (suresh) + + HDFS-2636. Implement protobuf service for ClientDatanodeProtocol. (suresh) + + HDFS-2642. Protobuf translators for DatanodeProtocol. (jitendra) + + HDFS-2647. Used protobuf based RPC for InterDatanodeProtocol, + ClientDatanodeProtocol, JournalProtocol, NamenodeProtocol. (suresh) + + HDFS-2661. Enable protobuf RPC for DatanodeProtocol. (jitendra) + + HDFS-2697. Move RefreshAuthPolicy, RefreshUserMappings, GetUserMappings + protocol to protocol buffers. (jitendra) + + HDFS-2880. Protobuf changes in DatanodeProtocol to add multiple storages. + (suresh) + + HDFS-2899. Service protocol changes in DatanodeProtocol to add multiple + storages. (suresh) + + HDFS-2430. The number of failed or low-resource volumes the NN can tolerate + should be configurable. (atm) + + HDFS-1623. High Availability Framework for HDFS NN. Contributed by Todd + Lipcon, Aaron T. Myers, Eli Collins, Uma Maheswara Rao G, Bikas Saha, + Suresh Srinivas, Jitendra Nath Pandey, Hari Mankude, Brandon Li, Sanjay + Radia, Mingjie Lai, and Gregory Chanan + + HDFS-2941. Add an administrative command to download a copy of the fsimage + from the NN. (atm) + + HDFS-2413. Add an API DistributedFileSystem.isInSafeMode() and change + DistributedFileSystem to @InterfaceAudience.LimitedPrivate. + (harsh via szetszwo) + + HDFS-3167. CLI-based driver for MiniDFSCluster. (Henry Robinson via atm) + + HDFS-3148. The client should be able to use multiple local interfaces + for data transfer. (eli) + + HDFS-3000. Add a public API for setting quotas. (atm) + + HDFS-3102. Add CLI tool to initialize the shared-edits dir. (atm) + + HDFS-3004. Implement Recovery Mode. (Colin Patrick McCabe via eli) + + HDFS-3282. Add HdfsDataInputStream as a public API. (umamahesh) + + HDFS-3298. Add HdfsDataOutputStream as a public API. (szetszwo) + + HDFS-234. Integration with BookKeeper logging system. (Ivan Kelly + via umamahesh) + + IMPROVEMENTS + + HDFS-2018. Move all journal stream management code into one place. + (Ivan Kelly via jitendra) + + HDFS-2223. Untangle depencencies between NN components (todd) + + HDFS-2351. Change Namenode and Datanode to register each of their protocols + seperately (sanjay) + + HDFS-2337. DFSClient shouldn't keep multiple RPC proxy references (atm) + + HDFS-2181. Separate HDFS Client wire protocol data types (sanjay) + + HDFS-2459. Separate datatypes for Journal Protocol. (suresh) + + HDFS-2480. Separate datatypes for NamenodeProtocol. (suresh) + + HDFS-2489. Move Finalize and Register to separate file out of + DatanodeCommand.java. (suresh) + + HDFS-2488. Separate datatypes for InterDatanodeProtocol. (suresh) + + HDFS-2496. Separate datatypes for DatanodeProtocol. (suresh) + + HDFS-2479. HDFS Client Data Types in Protocol Buffers (sanjay) + + HADOOP-7862. Hdfs changes to work with HADOOP-7862: Move the support for + multiple protocols to lower layer so that Writable, PB and Avro can all + use it. (sanjay) + + HDFS-2597. ClientNameNodeProtocol in Protocol Buffers. (sanjay) + + HDFS-2651. ClientNameNodeProtocol Translators for Protocol Buffers. (sanjay) + + HDFS-2650. Replace @inheritDoc with @Override. (Hari Mankude via suresh). + + HDFS-2669. Enable protobuf rpc for ClientNamenodeProtocol. (sanjay) + + HDFS-2801. Provide a method in client side translators to check for a + methods supported in underlying protocol. (jitendra) + + HDFS-2895. Remove Writable wire protocol types and translators to + complete transition to protocol buffers. (suresh) + + HDFS-2992. Edit log failure trace should include transaction ID of + error. (Colin Patrick McCabe via eli) + + HDFS-3030. Remove getProtocolVersion and getProtocolSignature from + translators. (jitendra) + + HDFS-2158. Add JournalSet to manage the set of journals. (jitendra) + + HDFS-2334. Add Closeable to JournalManager. (Ivan Kelly via jitendra) + + HDFS-1580. Add interface for generic Write Ahead Logging mechanisms. + (Ivan Kelly via jitendra) + + HDFS-3060. Bump TestDistributedUpgrade#testDistributedUpgrade timeout (eli) + + HDFS-2410. Further cleanup of hardcoded configuration keys and values. + (suresh) + + HDFS-2878. Fix TestBlockRecovery and move it back into main test directory. + (todd) + + HDFS-3003. Remove getHostPortString() from NameNode, replace it with + NetUtils.getHostPortString(). (Brandon Li via atm) + + HDFS-3014. FSEditLogOp and its subclasses should have toString() method. + (Sho Shimauchi via atm) + + HDFS-3021. Use generic type to declare FSDatasetInterface. (szetszwo) + + HDFS-3056. Add a new interface RollingLogs for DataBlockScanner logging. + (szetszwo) + + HDFS-2731. Add command to bootstrap the Standby Node's name directories + from the Active NameNode. (todd) + + HDFS-3082. Clean up FSDatasetInterface and change DataNode.data to package + private. (szetszwo) + + HDFS-3057. httpfs and hdfs launcher scripts should honor CATALINA_HOME + and HADOOP_LIBEXEC_DIR (rvs via tucu) + + HDFS-3088. Move FSDatasetInterface inner classes to a package. (szetszwo) + + HDFS-3111. Missing license headers in trunk. (umamahesh) + + HDFS-3091. Update the usage limitations of ReplaceDatanodeOnFailure policy in + the config description for the smaller clusters. (szetszwo via umamahesh) + + HDFS-3105. Add DatanodeStorage information to block recovery. (szetszwo) + + HDFS-3086. Change Datanode not to send storage list in registration. + (szetszwo) + + HDFS-309. FSEditLog should log progress during replay. (Sho Shimauchi + via todd) + + HDFS-3044. fsck move should be non-destructive by default. + (Colin Patrick McCabe via eli) + + HDFS-3071. haadmin failover command does not provide enough detail when + target NN is not ready to be active. (todd) + + HDFS-3089. Move FSDatasetInterface and the related classes to a package. + (szetszwo) + + HDFS-3129. NetworkTopology: add test that getLeaf should check for + invalid topologies. (Colin Patrick McCabe via eli) + + HDFS-3155. Clean up FSDataset implemenation related code. (szetszwo) + + HDFS-3158. LiveNodes member of NameNodeMXBean should list non-DFS used + space and capacity per DN. (atm) + + HDFS-3172. dfs.upgrade.permission is dead code. (eli) + + HDFS-3171. The DatanodeID "name" field is overloaded. (eli) + + HDFS-3144. Refactor DatanodeID#getName by use. (eli) + + HDFS-3130. Move fsdataset implementation to a package. (szetszwo) + + HDFS-3120. Enable hsync and hflush by default. (eli) + + HDFS-3187. Upgrade guava to 11.0.2 (todd) + + HDFS-3168. Remove unnecessary "throw IOException" and change fields to + final in FSNamesystem and BlockManager. (szetszwo) + + HDFS-2564. Cleanup unnecessary exceptions thrown and unnecessary casts. + (Hari Mankude via eli) + + HDFS-3084. FenceMethod.tryFence() and ShellCommandFencer should pass + namenodeId as well as host:port (todd) + + HDFS-3050. rework OEV to share more code with the NameNode. + (Colin Patrick McCabe via eli) + + HDFS-3204. Minor modification to JournalProtocol.proto to make + it generic. (suresh) + + HDFS-3226. Allow GetConf tool to print arbitrary keys (todd) + + HDFS-3240. Drop log level of "heartbeat: ..." in BPServiceActor to DEBUG + (todd) + + HDFS-3238. ServerCommand and friends don't need to be writables. (eli) + + HDFS-3094. add -nonInteractive and -force option to namenode -format + command (Arpit Gupta via todd) + + HDFS-3244. Remove dead writable code from hdfs/protocol. (eli) + + HDFS-3247. Improve bootstrapStandby behavior when original NN is not active + (todd) + + HDFS-3249. Use ToolRunner.confirmPrompt in NameNode (todd) + + HDFS-3179. Improve the exception message thrown by DataStreamer when + it failed to add a datanode. (szetszwo) + + HDFS-2983. Relax the build version check to permit rolling upgrades within a release. (atm) + + HDFS-3259. NameNode#initializeSharedEdits should populate shared edits dir + with edit log segments. (atm) + + HDFS-2708. Stats for the # of blocks per DN. (atm) + + HDFS-3279. Move the FSEditLog constructor with @VisibleForTesting to + TestEditLog. (Arpit Gupta via szetszwo) + + HDFS-3294. Fix code indentation in NamenodeWebHdfsMethods and + DatanodeWebHdfsMethods. (szetszwo) + + HDFS-3263. HttpFS should read HDFS config from Hadoop site.xml files (tucu) + + HDFS-3206. Miscellaneous xml cleanups for OEV. + (Colin Patrick McCabe via eli) + + HDFS-3169. TestFsck should test multiple -move operations in a row. + (Colin Patrick McCabe via eli) + + HDFS-3258. Test for HADOOP-8144 (pseudoSortByDistance in + NetworkTopology for first rack local node). (Junping Du via eli) + + HDFS-3322. Use HdfsDataInputStream and HdfsDataOutputStream in Hdfs. + (szetszwo) + + HDFS-3339. Change INode to package private. (John George via szetszwo) + + HDFS-3303. Remove Writable implementation from RemoteEditLogManifest. + (Brandon Li via szetszwo) + + HDFS-2617. Replaced Kerberized SSL for image transfer and fsck + with SPNEGO-based solution. (jghoman, omalley, tucu, and atm via eli) + + HDFS-3365. Enable users to disable socket caching in DFS client + configuration (todd) + + HDFS-3375. Put client name in DataXceiver thread name for readBlock + and keepalive (todd) + + HDFS-3363. Define BlockCollection and MutableBlockCollection interfaces + so that INodeFile and INodeFileUnderConstruction do not have to be used in + block management. (John George via szetszwo) + + HDFS-3211. Add fence(..) and replace NamenodeRegistration with JournalInfo + and epoch in JournalProtocol. (suresh via szetszwo) + + HADOOP-8285 HDFS changes for Use ProtoBuf for RpcPayLoadHeader (sanjay radia) + + HDFS-3418. Rename BlockWithLocationsProto datanodeIDs field to storageIDs. + (eli) + + OPTIMIZATIONS + + HDFS-2477. Optimize computing the diff between a block report and the + namenode state. (Tomasz Nykiel via hairong) + + HDFS-2495. Increase granularity of write operations in ReplicationMonitor + thus reducing contention for write lock. (Tomasz Nykiel via hairong) + + HDFS-2476. More CPU efficient data structure for under-replicated, + over-replicated, and invalidated blocks. (Tomasz Nykiel via todd) + + HDFS-3036. Remove unused method DFSUtil#isDefaultNamenodeAddress. (atm) + + HDFS-3378. Remove DFS_NAMENODE_SECONDARY_HTTPS_PORT_KEY and DEFAULT. (eli) + + BUG FIXES + + HDFS-2481. Unknown protocol: org.apache.hadoop.hdfs.protocol.ClientProtocol. + (sanjay) + + HDFS-2497. Fix TestBackupNode failure. (suresh) + + HDFS-2499. RPC client is created incorrectly introduced in HDFS-2459. + (suresh) + + HDFS-2526. (Client)NamenodeProtocolTranslatorR23 do not need to keep a + reference to rpcProxyWithoutRetry (atm) + + HDFS-2532. TestDfsOverAvroRpc timing out in trunk (Uma Maheswara Rao G + via todd) + + HDFS-2666. Fix TestBackupNode failure. (suresh) + + HDFS-2663. Optional protobuf parameters are not handled correctly. (suresh) + + HDFS-2694. Removal of Avro broke non-PB NN services. (atm) + + HDFS-2687. Tests failing with ClassCastException post protobuf RPC + changes. (suresh) + + HDFS-2700. Fix failing TestDataNodeMultipleRegistrations in trunk + (Uma Maheswara Rao G via todd) + + HDFS-2739. SecondaryNameNode doesn't start up. (jitendra) + + HDFS-2768. BackupNode stop can not close proxy connections because + it is not a proxy instance. (Uma Maheswara Rao G via eli) + + HDFS-2968. Protocol translator for BlockRecoveryCommand broken when + multiple blocks need recovery. (todd) + + HDFS-3020. Fix editlog to automatically sync when buffer is full. (todd) + + HDFS-3038. Add FSEditLog.metrics to findbugs exclude list. (todd via atm) + + HDFS-2188. Make FSEditLog create its journals from a list of URIs rather + than NNStorage. (Ivan Kelly via jitendra) + + HDFS-1765. Block Replication should respect under-replication + block priority. (Uma Maheswara Rao G via eli) + + HDFS-2285. BackupNode should reject requests to modify namespace. + (shv and Uma Maheswara Rao) + + HDFS-2764. TestBackupNode is racy. (atm) + + HDFS-3093. Fix bug where namenode -format interpreted the -force flag in + reverse. (todd) + + HDFS-3005. FSVolume.decDfsUsed(..) should be synchronized. (szetszwo) + + HDFS-3099. SecondaryNameNode does not properly initialize metrics system. + (atm) + + HDFS-3062. Fix bug which prevented MR job submission from creating + delegation tokens on an HA cluster. (Mingjie Lai via todd) + + HDFS-3083. Cannot run an MR job with HA and security enabled when + second-listed NN active. (atm) + + HDFS-3100. In BlockSender, throw an exception when it needs to verify + checksum but the meta data does not exist. (Brandon Li via szetszwo) + + HDFS-3132. Fix findbugs warning on HDFS trunk. (todd) + + HDFS-3156. TestDFSHAAdmin is failing post HADOOP-8202. (atm) + + HDFS-3143. TestGetBlocks.testGetBlocks is failing. (Arpit Gupta via atm) + + HDFS-3142. TestHDFSCLI.testAll is failing. (Brandon Li via atm) + + HDFS-3070. HDFS balancer doesn't ensure that hdfs-site.xml is loaded. (atm) + + HDFS-2995. start-dfs.sh should only start the 2NN for namenodes + with dfs.namenode.secondary.http-address configured. (eli) + + HDFS-3174. Fix assert in TestPendingDataNodeMessages. (eli) + + HDFS-3199. TestValidateConfigurationSettings is failing. (todd via eli) + + HDFS-3202. NamespaceInfo PB translation drops build version. (atm) + + HDFS-3109. Remove hsqldf exclusions from pom.xml. (Ravi Prakash + via suresh) + + HDFS-3210. JsonUtil#toJsonMap for for a DatanodeInfo should use + "ipAddr" instead of "name". (eli) + + HDFS-3208. Bogus entries in hosts files are incorrectly displayed + in the report. (eli) + + HDFS-3136. Remove SLF4J dependency as HDFS does not need it to fix + unnecessary warnings. (Jason Lowe via suresh) + + HDFS-3214. InterDatanodeProtocolServerSideTranslatorPB doesn't handle + null response from initReplicaRecovery (todd) + + HDFS-3119. Overreplicated block is not deleted even after the replication + factor is reduced after sync follwed by closing that file. (Ashish Singhi + via umamahesh) + + HDFS-3234. Accidentally left log message in GetConf after HDFS-3226 (todd) + + HDFS-3236. NameNode does not initialize generic conf keys when started + with -initializeSharedEditsDir (atm) + + HDFS-3248. bootstrapStandby repeated twice in hdfs namenode usage message + (Colin Patrick McCabe via todd) + + HDFS-2696. Fix the fuse-fds build. (Bruno Mahé via eli) + + HDFS-3254. Branch-2 build broken due to wrong version number in + fuse-dfs' pom.xml. (Anupam Seth via eli) + + HDFS-3260. TestDatanodeRegistration should set minimum DN version in + addition to minimum NN version. (atm) + + HDFS-3255. HA DFS returns wrong token service (Daryn Sharp via todd) + + HDFS-3256. HDFS considers blocks under-replicated if topology script is + configured with only 1 rack. (atm) + + HDFS-2799. Trim fs.checkpoint.dir values. (Amith D K via eli) + + HDFS-2765. TestNameEditsConfigs is incorrectly swallowing IOE. (atm) + + HDFS-3280. DFSOutputStream.sync should not be synchronized (todd) + + HDFS-3268. FileContext API mishandles token service and incompatible with + HA (Daryn Sharp via todd) + + HDFS-3284. bootstrapStandby fails in secure cluster (todd) + + HDFS-3165. HDFS Balancer scripts are refering to wrong path of + hadoop-daemon.sh (Amith D K via eli) + + HDFS-891. DataNode no longer needs to check for dfs.network.script. + (harsh via eli) + + HDFS-3305. GetImageServlet should consider SBN a valid requestor in a + secure HA setup. (atm) + + HDFS-3314. HttpFS operation for getHomeDirectory is incorrect. (tucu) + + HDFS-3319. Change DFSOutputStream to not to start a thread in constructors. + (szetszwo) + + HDFS-3222. DFSInputStream#openInfo should not silently get the length as 0 + when locations length is zero for last partial block. (umamahesh) + + HDFS-3181. Fix a test case in TestLeaseRecovery2. (szetszwo) + + HDFS-3309. HttpFS (Hoop) chmod not supporting octal and sticky bit + permissions. (tucu) + + HDFS-3326. Append enabled log message uses the wrong variable. + (Matthew Jacobs via eli) + + HDFS-3275. Skip format for non-file based directories. + (Amith D K via umamahesh) + + HDFS-3286. When the threshold value for balancer is zero, unexpected output is displayed. + (Ashish Singhi via umamahesh) + + HDFS-3336. hdfs launcher script will be better off not special casing + namenode command with regards to hadoop.security.logger (rvs via tucu) + + HDFS-3330. If GetImageServlet throws an Error or RTE, response should not + have HTTP "OK" status. (todd) + + HDFS-3351. NameNode#initializeGenericKeys should always set fs.defaultFS + regardless of whether HA or Federation is enabled. (atm) + + HDFS-3332. NullPointerException in DN when directoryscanner is trying to + report bad blocks. (Amith D K via umamahesh) + + HDFS-3359. DFSClient.close should close cached sockets. (todd) + + HDFS-3350. In INode, add final to compareTo(..), equals(..) and hashCode(), + and remove synchronized from updatePermissionStatus(..). (szetszwo) + + HDFS-3357. DataXceiver reads from client socket with incorrect/no timeout + (todd) + + HDFS-3376. DFSClient fails to make connection to DN if there are many + unusable cached sockets (todd) + + HDFS-3328. NPE in DataNode.getIpcPort. (eli) + + HDFS-3396. FUSE build fails on Ubuntu 12.04. (Colin Patrick McCabe via eli) + + HDFS-3395. NN doesn't start with HA+security enabled and HTTP address set to 0.0.0.0. (atm) + + HDFS-3026. HA: Handle failure during HA state transition. (atm) + + HDFS-860. fuse-dfs truncate behavior causes issues with scp. + (Brian Bockelman via eli) + + BREAKDOWN OF HDFS-1623 SUBTASKS + + HDFS-2179. Add fencing framework and mechanisms for NameNode HA. (todd) + + HDFS-1974. Introduce active and standy states to the namenode. (suresh) + + HDFS-2407. getServerDefaults and getStats don't check operation category (atm) + + HDFS-1973. HA: HDFS clients must handle namenode failover and switch over to + the new active namenode. (atm) + + HDFS-2301. Start/stop appropriate namenode services when transition to active + and standby states. (suresh) + + HDFS-2231. Configuration changes for HA namenode. (suresh) + + HDFS-2418. Change ConfiguredFailoverProxyProvider to take advantage of + HDFS-2231. (atm) + + HDFS-2393. Mark appropriate methods of ClientProtocol with the idempotent + annotation. (atm) + + HDFS-2523. Small NN fixes to include HAServiceProtocol and prevent NPE on + shutdown. (todd) + + HDFS-2577. NN fails to start since it tries to start secret manager in + safemode. (todd) + + HDFS-2582. Scope dfs.ha.namenodes config by nameservice (todd) + + HDFS-2591. MiniDFSCluster support to mix and match federation with HA (todd) + + HDFS-1975. Support for sharing the namenode state from active to standby. + (jitendra, atm, todd) + + HDFS-1971. Send block report from datanode to both active and standby + namenodes. (sanjay, todd via suresh) + + HDFS-2616. Change DatanodeProtocol#sendHeartbeat() to return HeartbeatResponse. + (suresh) + + HDFS-2622. Fix TestDFSUpgrade in HA branch. (todd) + + HDFS-2612. Handle refreshNameNodes in federated HA clusters (todd) + + HDFS-2623. Add test case for hot standby capability (todd) + + HDFS-2626. BPOfferService.verifyAndSetNamespaceInfo needs to be synchronized + (todd) + + HDFS-2624. ConfiguredFailoverProxyProvider doesn't correctly stop + ProtocolTranslators (todd) + + HDFS-2625. TestDfsOverAvroRpc failing after introduction of HeartbeatResponse + type (todd) + + HDFS-2627. Determine DN's view of which NN is active based on heartbeat + responses (todd) + + HDFS-2634. Standby needs to ingest latest edit logs before transitioning to + active (todd) + + HDFS-2671. NN should throw StandbyException in response to RPCs in STANDBY + state (todd) + + HDFS-2680. DFSClient should construct failover proxy with exponential backoff + (todd) + + HDFS-2683. Authority-based lookup of proxy provider fails if path becomes + canonicalized (todd) + + HDFS-2689. HA: BookKeeperEditLogInputStream doesn't implement isInProgress() + (atm) + + HDFS-2602. NN should log newly-allocated blocks without losing BlockInfo (atm) + + HDFS-2667. Fix transition from active to standby (todd) + + HDFS-2684. Fix up some failing unit tests on HA branch (todd) + + HDFS-2679. Add interface to query current state to HAServiceProtocol (eli via + todd) + + HDFS-2677. Web UI should indicate the NN state. (eli via todd) + + HDFS-2678. When a FailoverProxyProvider is used, DFSClient should not retry + connection ten times before failing over (atm via todd) + + HDFS-2682. When a FailoverProxyProvider is used, Client should not retry for 45 + times if it is timing out to connect to server. (Uma Maheswara Rao G via todd) + + HDFS-2693. Fix synchronization issues around state transition (todd) + + HDFS-1972. Fencing mechanism for block invalidations and replications (todd) + + HDFS-2714. Fix test cases which use standalone FSNamesystems (todd) + + HDFS-2692. Fix bugs related to failover from/into safe mode. (todd) + + HDFS-2716. Configuration needs to allow different dfs.http.addresses for each + HA NN (todd) + + HDFS-2720. Fix MiniDFSCluster HA support to work properly on Windows. (Uma + Maheswara Rao G via todd) + + HDFS-2291. Allow the StandbyNode to make checkpoints in an HA setup. (todd) + + HDFS-2709. Appropriately handle error conditions in EditLogTailer (atm via + todd) + + HDFS-2730. Refactor shared HA-related test code into HATestUtil class (todd) + + HDFS-2762. Fix TestCheckpoint timing out on HA branch. (Uma Maheswara Rao G via + todd) + + HDFS-2724. NN web UI can throw NPE after startup, before standby state is + entered. (todd) + + HDFS-2753. Fix standby getting stuck in safemode when blocks are written while + SBN is down. (Hari Mankude and todd via todd) + + HDFS-2773. Reading edit logs from an earlier version should not leave blocks in + under-construction state. (todd) + + HDFS-2775. Fix TestStandbyCheckpoints.testBothNodesInStandbyState failing + intermittently. (todd) + + HDFS-2766. Test for case where standby partially reads log and then performs + checkpoint. (atm) + + HDFS-2738. FSEditLog.selectinputStreams is reading through in-progress streams + even when non-in-progress are requested. (atm) + + HDFS-2789. TestHAAdmin.testFailover is failing (eli) + + HDFS-2747. Entering safe mode after starting SBN can NPE. (Uma Maheswara Rao G + via todd) + + HDFS-2772. On transition to active, standby should not swallow ELIE. (atm) + + HDFS-2767. ConfiguredFailoverProxyProvider should support NameNodeProtocol. + (Uma Maheswara Rao G via todd) + + HDFS-2795. Standby NN takes a long time to recover from a dead DN starting up. + (todd) + + HDFS-2592. Balancer support for HA namenodes. (Uma Maheswara Rao G via todd) + + HDFS-2367. Enable the configuration of multiple HA cluster addresses. (atm) + + HDFS-2812. When becoming active, the NN should treat all leases as freshly + renewed. (todd) + + HDFS-2737. Automatically trigger log rolls periodically on the active NN. (todd + and atm) + + HDFS-2820. Add a simple sanity check for HA config (todd) + + HDFS-2688. Add tests for quota tracking in an HA cluster. (todd) + + HDFS-2804. Should not mark blocks under-replicated when exiting safemode (todd) + + HDFS-2807. Service level authorizartion for HAServiceProtocol. (jitendra) + + HDFS-2809. Add test to verify that delegation tokens are honored after + failover. (jitendra and atm) + + HDFS-2838. NPE in FSNamesystem when in safe mode. (Gregory Chanan via eli) + + HDFS-2805. Add a test for a federated cluster with HA NNs. (Brandon Li via + jitendra) + + HDFS-2841. HAAdmin does not work if security is enabled. (atm) + + HDFS-2691. Fixes for pipeline recovery in an HA cluster: report RBW replicas + immediately upon pipeline creation. (todd) + + HDFS-2824. Fix failover when prior NN died just after creating an edit log + segment. (atm via todd) + + HDFS-2853. HA: NN fails to start if the shared edits dir is marked required + (atm via eli) + + HDFS-2845. SBN should not allow browsing of the file system via web UI. (Bikas + Saha via atm) + + HDFS-2742. HA: observed dataloss in replication stress test. (todd via eli) + + HDFS-2870. Fix log level for block debug info in processMisReplicatedBlocks + (todd) + + HDFS-2859. LOCAL_ADDRESS_MATCHER.match has NPE when called from + DFSUtil.getSuffixIDs when the host is incorrect (Bikas Saha via todd) + + HDFS-2861. checkpointing should verify that the dfs.http.address has been + configured to a non-loopback for peer NN (todd) + + HDFS-2860. TestDFSRollback#testRollback is failing. (atm) + + HDFS-2769. HA: When HA is enabled with a shared edits dir, that dir should be + marked required. (atm via eli) + + HDFS-2863. Failures observed if dfs.edits.dir and shared.edits.dir have same + directories. (Bikas Saha via atm) + + HDFS-2874. Edit log should log to shared dirs before local dirs. (todd) + + HDFS-2890. DFSUtil#getSuffixIDs should skip unset configurations. (atm) + + HDFS-2792. Make fsck work. (atm) + + HDFS-2808. HA: haadmin should use namenode ids. (eli) + + HDFS-2819. Document new HA-related configs in hdfs-default.xml. (eli) + + HDFS-2752. HA: exit if multiple shared dirs are configured. (eli) + + HDFS-2894. HA: automatically determine the nameservice Id if only one + nameservice is configured. (eli) + + HDFS-2733. Document HA configuration and CLI. (atm) + + HDFS-2794. Active NN may purge edit log files before standby NN has a chance to + read them (todd) + + HDFS-2901. Improvements for SBN web UI - not show under-replicated/missing + blocks. (Brandon Li via jitendra) + + HDFS-2905. HA: Standby NN NPE when shared edits dir is deleted. (Bikas Saha via + jitendra) + + HDFS-2579. Starting delegation token manager during safemode fails. (todd) + + HDFS-2510. Add HA-related metrics. (atm) + + HDFS-2924. Standby checkpointing fails to authenticate in secure cluster. + (todd) + + HDFS-2915. HA: TestFailureOfSharedDir.testFailureOfSharedDir() has race + condition. (Bikas Saha via jitendra) + + HDFS-2912. Namenode not shutting down when shared edits dir is inaccessible. + (Bikas Saha via atm) + + HDFS-2917. HA: haadmin should not work if run by regular user (eli) + + HDFS-2939. TestHAStateTransitions fails on Windows. (Uma Maheswara Rao G via + atm) + + HDFS-2947. On startup NN throws an NPE in the metrics system. (atm) + + HDFS-2942. TestActiveStandbyElectorRealZK fails if build dir does not exist. + (atm) + + HDFS-2948. NN throws NPE during shutdown if it fails to startup (todd) + + HDFS-2909. HA: Inaccessible shared edits dir not getting removed from FSImage + storage dirs upon error. (Bikas Saha via jitendra) + + HDFS-2934. Allow configs to be scoped to all NNs in the nameservice. (todd) + + HDFS-2935. Shared edits dir property should be suffixed with nameservice and + namenodeID (todd) + + HDFS-2928. ConfiguredFailoverProxyProvider should not create a NameNode proxy + with an underlying retry proxy. (Uma Maheswara Rao G via atm) + + HDFS-2955. IllegalStateException during standby startup in getCurSegmentTxId. + (Hari Mankude via atm) + + HDFS-2937. TestDFSHAAdmin needs tests with MiniDFSCluster. (Brandon Li via + suresh) + + HDFS-2586. Add protobuf service and implementation for HAServiceProtocol. + (suresh via atm) + + HDFS-2952. NN should not start with upgrade option or with a pending an + unfinalized upgrade. (atm) + + HDFS-2974. MiniDFSCluster does not delete standby NN name dirs during format. + (atm) + + HDFS-2929. Stress test and fixes for block synchronization (todd) + + HDFS-2972. Small optimization building incremental block report (todd) + + HDFS-2973. Re-enable NO_ACK optimization for block deletion. (todd) + + HDFS-2922. HA: close out operation categories (eli) + + HDFS-2993. HA: BackupNode#checkOperation should permit CHECKPOINT operations + (eli) + + HDFS-2904. Client support for getting delegation tokens. (todd) + + HDFS-3013. HA: NameNode format doesn't pick up + dfs.namenode.name.dir.NameServiceId configuration (Mingjie Lai via todd) + + HDFS-3019. Fix silent failure of TestEditLogJournalFailures (todd) + + HDFS-2958. Sweep for remaining proxy construction which doesn't go through + failover path. (atm) + + HDFS-2920. fix remaining TODO items. (atm and todd) + + HDFS-3027. Implement a simple NN health check. (atm) + + HDFS-3023. Optimize entries in edits log for persistBlocks call. (todd) + + HDFS-2979. Balancer should use logical uri for creating failover proxy with HA + enabled. (atm) + + HDFS-3035. Fix failure of TestFileAppendRestart due to OP_UPDATE_BLOCKS (todd) + + HDFS-3039. Address findbugs and javadoc warnings on branch. (todd via atm) + +Release 0.23.10 - UNRELEASED + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + HDFS-5010. Reduce the frequency of getCurrentUser() calls from namenode + (kihwal) + + OPTIMIZATIONS + + BUG FIXES + + HDFS-4998. TestUnderReplicatedBlocks fails intermittently (kihwal) + +Release 0.23.9 - UNRELEASED + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + + HDFS-4867. metaSave NPEs when there are invalid blocks in repl queue. + (Plamen Jeliazkov and Ravi Prakash via shv) + + HDFS-4862. SafeModeInfo.isManual() returns true when resources are low even + if it wasn't entered into manually (Ravi Prakash via kihwal) + + HDFS-4832. Namenode doesn't change the number of missing blocks in + safemode when DNs rejoin or leave (Ravi Prakash via kihwal) + + HDFS-4878. On Remove Block, block is not removed from neededReplications + queue. (Tao Luo via shv) + + HDFS-4205. fsck fails with symlinks. (jlowe) + +Release 0.23.8 - 2013-06-05 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + HDFS-4714. Log short messages in Namenode RPC server for exceptions + meant for clients. (kihwal) + + OPTIMIZATIONS + + BUG FIXES + + HDFS-4477. Secondary namenode may retain old tokens (daryn via kihwal) + + HDFS-4699. TestPipelinesFailover#testPipelineRecoveryStress fails + sporadically (Chris Nauroth via kihwal) + + HDFS-4805. Webhdfs client is fragile to token renewal errors + (daryn via kihwal) + + HDFS-3875. Issue handling checksum errors in write pipeline. (kihwal) + + HDFS-4807. createSocketForPipeline() should not include timeout extension + on connect. (Cristina L. Abad via kihwal) + +Release 0.23.7 - 2013-04-18 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + HDFS-4532. RPC call queue may fill due to current user lookup (daryn) + + BUG FIXES + + HDFS-4288. NN accepts incremental BR as IBR in safemode (daryn via kihwal) + + HDFS-4495. Allow client-side lease renewal to be retried beyond soft-limit + (kihwal) + + HDFS-4128. 2NN gets stuck in inconsistent state if edit log replay fails + in the middle (kihwal via daryn) + + HDFS-4542. Webhdfs doesn't support secure proxy users (Daryn Sharp via + kihwal) + + HDFS-4560. Webhdfs cannot use tokens obtained by another user (daryn) + + HDFS-4566. Webdhfs token cancelation should use authentication (daryn) + + HDFS-4567. Webhdfs does not need a token for token operations (daryn via + kihwal) + + HDFS-4577. Webhdfs operations should declare if authentication is required + (daryn via kihwal) + + HDFS-3344. Unreliable corrupt blocks counting in TestProcessCorruptBlocks + (kihwal) + + HDFS-3367. WebHDFS doesn't use the logged in user when opening + connections (daryn) + + HDFS-4581. checkDiskError should not be called on network errors (Rohit + Kochar via kihwal) + + HDFS-4649. Webhdfs cannot list large directories (daryn via kihwal) + + HDFS-4548. Webhdfs doesn't renegotiate SPNEGO token (daryn via kihwal) + +Release 0.23.6 - 2013-02-06 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + + HDFS-4247. saveNamespace should be tolerant of dangling lease (daryn) + + HDFS-4248. Renaming directories may incorrectly remove the paths in leases + under the tree. (daryn via szetszwo) + + HDFS-4385. Maven RAT plugin is not checking all source files (tgraves) + + HDFS-4426. Secondary namenode shuts down immediately after startup. + (Arpit Agarwal via suresh) + +Release 0.23.5 - 2012-11-28 + + INCOMPATIBLE CHANGES + + HDFS-4080. Add a separate logger for block state change logs to enable turning + off those logs. (Kihwal Lee via suresh) + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + HDFS-4075. Reduce recommissioning overhead (Kihwal Lee via daryn) + + HDFS-3990. NN's health report has severe performance problems (daryn) + + HDFS-4181. LeaseManager tries to double remove and prints extra messages + (Kihwal Lee via daryn) + + BUG FIXES + + HDFS-3829. TestHftpURLTimeouts fails intermittently with JDK7 (Trevor + Robinson via tgraves) + + HDFS-3824. TestHftpDelegationToken fails intermittently with JDK7 (Trevor + Robinson via tgraves) + + HDFS-3224. Bug in check for DN re-registration with different storage ID + (jlowe) + + HDFS-4090. getFileChecksum() result incompatible when called against + zero-byte files. (Kihwal Lee via daryn) + + HDFS-4172. namenode does not URI-encode parameters when building URI for + datanode request (Derek Dagit via bobby) + + HDFS-4182. SecondaryNameNode leaks NameCache entries (bobby) + + HDFS-4186. logSync() is called with the write lock held while releasing + lease (Kihwal Lee via daryn) + +Release 0.23.4 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + + HDFS-3831. Failure to renew tokens due to test-sources left in classpath + (jlowe via bobby) + +Release 0.23.3 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + HDFS-2505. Add a test to verify getFileChecksum(..) with ViewFS. (Ravi + Prakash via szetszwo) + + OPTIMIZATIONS + + BUG FIXES + + HDFS-3166. Add timeout to Hftp connections. (Daryn Sharp via szetszwo) + + HDFS-3176. Use MD5MD5CRC32FileChecksum.readFields() in JsonUtil . (Kihwal + Lee via szetszwo) + + HDFS-2652. Add support for host-based delegation tokens. (Daryn Sharp via + szetszwo) + + HDFS-3308. Uses canonical URI to select delegation tokens in HftpFileSystem + and WebHdfsFileSystem. (Daryn Sharp via szetszwo) + + HDFS-3312. In HftpFileSystem, the namenode URI is non-secure but the + delegation tokens have to use secure URI. (Daryn Sharp via szetszwo) + + HDFS-3318. Use BoundedInputStream in ByteRangeInputStream, otherwise, it + hangs on transfers >2 GB. (Daryn Sharp via szetszwo) + + HDFS-3321. Fix safe mode turn off tip message. (Ravi Prakash via szetszwo) + + HDFS-3334. Fix ByteRangeInputStream stream leakage. (Daryn Sharp via + szetszwo) + + HDFS-3331. In namenode, check superuser privilege for setBalancerBandwidth + and acquire the write lock for finalizeUpgrade. (szetszwo) + + HDFS-3037. TestMulitipleNNDataBlockScanner#testBlockScannerAfterRestart is + racy. (atm) + + HDFS-3577. In DatanodeWebHdfsMethods, use MessageBodyWriter instead of + StreamingOutput, otherwise, it will fail to transfer large files. + (szetszwo) + + HDFS-3646. LeaseRenewer can hold reference to inactive DFSClient + instances forever. (Kihwal Lee via daryn) + + HDFS-3696. Set chunked streaming mode in WebHdfsFileSystem write operations + to get around a Java library bug causing OutOfMemoryError. (szetszwo) + + HDFS-3553. Hftp proxy tokens are broken (daryn) + + HDFS-3718. Datanode won't shutdown because of runaway DataBlockScanner + thread (Kihwal Lee via daryn) + + HDFS-3861. Deadlock in DFSClient (Kihwal Lee via daryn) + + HDFS-3873. Hftp assumes security is disabled if token fetch fails (daryn) + + HDFS-3852. TestHftpDelegationToken is broken after HADOOP-8225 (daryn) + + HDFS-3890. filecontext mkdirs doesn't apply umask as expected + (Tom Graves via daryn) + +Release 0.23.2 - UNRELEASED + + INCOMPATIBLE CHANGES + + HDFS-2887. FSVolume, is a part of FSDatasetInterface implementation, should + not be referred outside FSDataset. A new FSVolumeInterface is defined. + The BlockVolumeChoosingPolicy.chooseVolume(..) method signature is also + updated. (szetszwo) + + NEW FEATURES + + HDFS-2943. Expose last checkpoint time and transaction stats as JMX + metrics. (atm) + + IMPROVEMENTS + + HDFS-2931. Switch DataNode's BlockVolumeChoosingPolicy to private-audience. + (harsh via szetszwo) + + HDFS-2655. BlockReaderLocal#skip performs unnecessary IO. (Brandon Li + via jitendra) + + HDFS-2725. hdfs script usage information is missing the information + about "dfs" command (Prashant Sharma via stevel) + + HDFS-2907. Add a conf property dfs.datanode.fsdataset.factory to make + FSDataset in Datanode pluggable. (szetszwo) + + HDFS-2985. Improve logging when replicas are marked as corrupt. (todd) + + HDFS-3098. Update and add tests for HADOOP-8173. (Daryn Sharp via szetszwo) + + HDFS-3104. Add tests for HADOOP-8175. (Daryn Sharp via szetszwo) + + HDFS-3066. Cap space usage of default log4j rolling policy. + (Patrick Hunt via eli) + + OPTIMIZATIONS + + HDFS-3024. Improve performance of stringification in addStoredBlock (todd) + + BUG FIXES + HDFS-2923. Namenode IPC handler count uses the wrong configuration key + (todd) + + HDFS-2764. TestBackupNode is racy. (atm) + + HDFS-2869. Fix an error in the webhdfs docs for the mkdir op (harsh) + + HDFS-776. Fix exception handling in Balancer. (Uma Maheswara Rao G + via szetszwo) + + HDFS-2815. Namenode sometimes oes not come out of safemode during + NN crash + restart. (Uma Maheswara Rao via suresh) + + HDFS-2950. Secondary NN HTTPS address should be listed as a + NAMESERVICE_SPECIFIC_KEY. (todd) + + HDFS-2525. Race between BlockPoolSliceScanner and append. (Brandon Li + via jitendra) + + HDFS-2938. Recursive delete of a large directory make namenode + unresponsive. (Hari Mankude via suresh) + + HDFS-2969. ExtendedBlock.equals is incorrectly implemented (todd) + + HDFS-2944. Typo in hdfs-default.xml causes + dfs.client.block.write.replace-datanode-on-failure.enable to be mistakenly + disabled. (atm) + + HDFS-2981. In hdfs-default.xml, the default value of + dfs.client.block.write.replace-datanode-on-failure.enable should be true. + (szetszwo) + + HDFS-3008. Negative caching of local addrs doesn't work. (eli) + + HDFS-3006. In WebHDFS, when the return body is empty, set the Content-Type + to application/octet-stream instead of application/json. (szetszwo) + + HDFS-2991. Fix case where OP_ADD would not be logged in append(). (todd) + + HDFS-3012. Exception while renewing delegation token. (Bobby Evans via + jitendra) + + HDFS-3032. Change DFSClient.renewLease() so that it only retries up to the + lease soft-limit. (Kihwal Lee via szetszwo) + + HDFS-2038. Update TestHDFSCLI to handle relative paths with globs. + (Kihwal Lee via szetszwo) + + HDFS-3101. Cannot read empty file using WebHDFS. (szetszwo) + + HDFS-3160. httpfs should exec catalina instead of forking it. + (Roman Shaposhnik via eli) + +Release 0.23.1 - 2012-02-17 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + HDFS-2316. [umbrella] WebHDFS: a complete FileSystem implementation for + accessing HDFS over HTTP (szetszwo) + + HDFS-2594. Support getDelegationTokens and createSymlink in WebHDFS. + (szetszwo) + + HDFS-2545. Change WebHDFS to support multiple namenodes in federation. + (szetszwo) + + HDFS-2178. Contributing Hoop to HDFS, replacement for HDFS proxy + with read/write capabilities. (tucu) + + + IMPROVEMENTS + HDFS-2560. Refactor BPOfferService to be a static inner class (todd) + + HDFS-2544. Hadoop scripts unconditionally source + "$bin"/../libexec/hadoop-config.sh. (Bruno Mahé via tomwhite) + + HDFS-2543. HADOOP_PREFIX cannot be overridden. (Bruno Mahé via tomwhite) + + HDFS-2562. Refactor DN configuration variables out of DataNode class + (todd) + + HDFS-2563. Some cleanup in BPOfferService. (todd) + + HDFS-2568. Use a set to manage child sockets in XceiverServer. + (harsh via eli) + + HDFS-2454. Move maxXceiverCount check to before starting the + thread in dataXceiver. (harsh via eli) + + HDFS-2570. Add descriptions for dfs.*.https.address in hdfs-default.xml. + (eli) + + HDFS-2536. Remove unused imports. (harsh via eli) + + HDFS-2566. Move BPOfferService to be a non-inner class. (todd) + + HDFS-2552. Add Forrest doc for WebHDFS REST API. (szetszwo) + + HDFS-2587. Add apt doc for WebHDFS REST API. (szetszwo) + + HDFS-2604. Add a log message to show if WebHDFS is enabled and a + configuration section in the forrest doc. (szetszwo) + + HDFS-2511. Add dev script to generate HDFS protobufs. (tucu) + + HDFS-2654. Make BlockReaderLocal not extend RemoteBlockReader2. (eli) + + HDFS-2675. Reduce warning verbosity when double-closing edit logs + (todd) + + HDFS-2335. DataNodeCluster and NNStorage always pull fresh entropy. + (Uma Maheswara Rao G via eli) + + HDFS-2574. Remove references to some deprecated properties in conf + templates and defaults files. (Joe Crobak via harsh) + + HDFS-2722. HttpFs should not be using an int for block size. (harsh) + + HDFS-2710. Add HDFS tests related to HADOOP-7933. (Siddarth Seth via + suresh) + + HDFS-2349. Corruption detected during block transfers between DNs + should log a WARN instead of INFO. (harsh) + + HDFS-2729. Update BlockManager's comments regarding the invalid block + set (harsh) + + HDFS-2726. Fix a logging issue under DFSClient's createBlockOutputStream + method (harsh) + + HDFS-554. Use System.arraycopy in BlockInfo.ensureCapacity. (harsh) + + HDFS-1314. Make dfs.blocksize accept size-indicating prefixes. + (Sho Shimauchi via harsh) + + HDFS-69. Improve the 'dfsadmin' commandline help. (harsh) + + HDFS-2788. HdfsServerConstants#DN_KEEPALIVE_TIMEOUT is dead code. (eli) + + HDFS-362. FSEditLog should not writes long and short as UTF8, and should + not use ArrayWritable for writing non-array items. (Uma Maheswara Rao G + via szetszwo) + + HDFS-2803. Add logging to LeaseRenewer for better lease expiration debugging. + (Jimmy Xiang via todd) + + HDFS-2817. Combine the two TestSafeMode test suites. (todd) + + HDFS-2818. Fix a missing space issue in HDFS webapps' title tags. (Devaraj K via harsh) + + HDFS-2397. Undeprecate SecondaryNameNode. (eli) + + HDFS-2814. NamenodeMXBean does not account for svn revision in the version + information. (Hitesh Shah via jitendra) + + HDFS-2784. Update hftp and hdfs for host-based token support. + (Kihwal Lee via jitendra) + + HDFS-2785. Update webhdfs and httpfs for host-based token support. + (Robert Joseph Evans via jitendra) + + HDFS-2868. Expose xceiver counts via the DataNode MXBean. (harsh) + + HDFS-2786. Fix host-based token incompatibilities in DFSUtil. (Kihwal + Lee via jitendra) + + HDFS-208. name node should warn if only one dir is listed in dfs.name.dir. + (Uma Maheswara Rao G via eli) + + HDFS-3139. Minor Datanode logging improvement. (eli) + + OPTIMIZATIONS + + HDFS-2130. Switch default checksum to CRC32C. (todd) + + HDFS-2533. Remove needless synchronization on some FSDataSet methods. + (todd) + + HDFS-2129. Simplify BlockReader to not inherit from FSInputChecker. + (todd) + + HDFS-2246. Enable reading a block directly from local file system + for a client on the same node as the block file. (Andrew Purtell, + Suresh Srinivas and Jitendra Nath Pandey via szetszwo) + + HDFS-2825. Add test hook to turn off the writer preferring its local + DN. (todd) + + HDFS-2826. Add test case for HDFS-1476 (safemode can initialize + replication queues before exiting) (todd) + + HDFS-2864. Remove some redundant methods and the constant METADATA_VERSION + from FSDataset. (szetszwo) + + HDFS-2879. Change FSDataset to package private. (szetszwo) + + BUG FIXES + + HDFS-2541. For a sufficiently large value of blocks, the DN Scanner + may request a random number with a negative seed value. (harsh via eli) + + HDFS-2502. hdfs-default.xml should include dfs.name.dir.restore. + (harsh via eli) + + HDFS-2567. When 0 DNs are available, show a proper error when + trying to browse DFS via web UI. (harsh via eli) + + HDFS-2575. DFSTestUtil may create empty files (todd) + + HDFS-2588. hdfs jsp pages missing DOCTYPE. (Dave Vronay via mattf) + + HDFS-2590. Fix the missing links in the WebHDFS forrest doc. (szetszwo) + + HDFS-2596. TestDirectoryScanner doesn't test parallel scans. (eli) + + HDFS-2606. webhdfs client filesystem impl must set the content-type + header for create/append. (tucu) + + HDFS-2614. hadoop dist tarball is missing hdfs headers. (tucu) + + HDFS-2653. DFSClient should cache whether addrs are non-local when + short-circuiting is enabled. (eli) + + HDFS-2649. eclipse:eclipse build fails for hadoop-hdfs-httpfs. + (Jason Lowe via eli) + + HDFS-2640. Javadoc generation hangs. (tomwhite) + + HDFS-2553. Fix BlockPoolSliceScanner spinning in a tight loop (Uma + Maheswara Rao G via todd) + + HDFS-2658. HttpFS introduced 70 javadoc warnings. (tucu) + + HDFS-2706. Use configuration for blockInvalidateLimit if it is set. + (szetszwo) + + HDFS-2646. Hadoop HttpFS introduced 4 findbug warnings. (tucu) + + HDFS-2657. TestHttpFSServer and TestServerWebApp are failing on trunk. (tucu) + + HDFS-2705. HttpFS server should check that upload requests have correct + content-type. (tucu) + + HDFS-2707. HttpFS should read the hadoop-auth secret from a file instead + inline from the configuration. (tucu) + + HDFS-2790. FSNamesystem.setTimes throws exception with wrong + configuration name in the message. (Arpit Gupta via eli) + + HDFS-2810. Leases not getting renewed properly by clients (todd) + + HDFS-2751. Datanode may incorrectly drop OS cache behind reads + even for short reads. (todd) + + HDFS-2816. Fix missing license header in httpfs findbugsExcludeFile.xml. + (hitesh via tucu) + + HDFS-2822. processMisReplicatedBlock incorrectly identifies + under-construction blocks as under-replicated. (todd) + + HDFS-442. dfsthroughput in test jar throws NPE (harsh) + + HDFS-2836. HttpFSServer still has 2 javadoc warnings in trunk. + (revans2 via tucu) + + HDFS-2837. mvn javadoc:javadoc not seeing LimitedPrivate class. + (revans2 via tucu) + + HDFS-2840. TestHostnameFilter should work with localhost or + localhost.localdomain (tucu) + + HDFS-2791. If block report races with closing of file, replica is + incorrectly marked corrupt. (todd) + + HDFS-2827. When the parent of a directory is the root, renaming the + directory results in leases updated incorrectly. (Uma Maheswara Rao G + via szetszwo) + + HDFS-2835. Fix findbugs and javadoc issue with GetConf.java. + (suresh) + + HDFS-2889. getNumCurrentReplicas is package private but should be public on + 0.23 (see HDFS-2408). (Gregory Chanan via atm) + + HDFS-2893. The start/stop scripts don't start/stop the 2NN when + using the default configuration. (eli) + +Release 0.23.0 - 2011-11-01 + + INCOMPATIBLE CHANGES + + HDFS-1526. Dfs client name for a map/reduce task should be unique + among threads. (hairong) + + HDFS-1536. Improve HDFS WebUI. (hairong) + + HDFS-2210. Remove hdfsproxy. (eli) + + HDFS-1073. Redesign the NameNode's storage layout for image checkpoints + and edit logs to introduce transaction IDs and be more robust. + Please see HDFS-1073 section below for breakout of individual patches. + + NEW FEATURES + + HDFS-1359. Add BlockPoolID to Block. (suresh) + + HDFS-1365. Federation: propose ClusterID and BlockPoolID format + (Tanping via boryas) + + HDFS-1394. Federation: modify -format option for namenode to generated + new blockpool id and accept newcluster (boryas) + + HDFS-1400. Federation: DataTransferProtocol uses ExtendedBlockPool to + include BlockPoolID in the protocol. (suresh) + + HDFS-1428. Federation : add cluster ID and block pool ID into + Name node web UI(Tanping via boryas) + + HDFS-1450. Federation: Introduce block pool ID into FSDatasetInterface. + (suresh) + + HDFS-1632. Federation: data node storage structure changes and + introduce block pool storage. (Tanping via suresh) + + HDFS-1634. Federation: Convert single threaded DataNode into + per BlockPool thread model.(boryas) + + HDFS-1637. Federation: FSDataset in Datanode should be created after + initial handshake with namenode. (boryas and jitendra) + + HDFS-1653. Federation: Block received message from datanode sends invalid + DatanodeRegistration. (Tanping via suresh) + + HDFS-1645. Federation: DatanodeCommond.Finalize needs to include + BlockPoolId. (suresh) + + HDFS-1638. Federation: DataNode.handleDiskError needs to inform + ALL namenodes if a disk failed (boryas) + + HDFS-1647. Federation: Multiple namenode configuration. (jitendra) + + HDFS-1639. Federation: Add block pool management to FSDataset. (suresh) + + HDFS-1648. Federation: Only DataStorage must be locked using in_use.lock + and no locks must be associated with BlockPoolStorage. (Tanping via suresh) + + HDFS-1641. Federation: Datanode fields that are no longer used should + be removed (boryas) + + HDFS-1642. Federation: add Datanode.getDNRegistration(String bpid) + method (boryas) + + HDFS-1643. Federation: remove namenode argument from DataNode + constructor (boryas) + + HDFS-1657. Federation: Tests that corrupt block files fail due to changed + file path in federation. (suresh) + + HDFS-1661. Federation: Remove unnecessary TODO:FEDERATION comments. + (jitendra) + + HDFS-1660. Federation: Datanode doesn't start with two namenodes (boryas) + + HDFS-1650. Federation: TestReplication fails. (Tanping via suresh) + + HDFS-1651. Federation: Tests fail due to null pointer exception in + Datnode#shutdown() method. (Tanping via suresh) + + HDFS-1649. Federation: Datanode command to refresh namenode list at + the datanode. (jitendra) + + HDFS-1646. Federation: MiniDFSClsuter#waitActive() waits for ever + with the introduction of BPOfferService in datanode. (suresh) + + HDFS-1659. Federation: BPOfferService exits after one iteration + incorrectly. (Tanping via suresh) + + HDFS-1654. Federation: Fix TestDFSUpgrade and TestDFSRollback failures. + (suresh) + + HDFS-1668. Federation: Datanodes sends block pool usage information + to the namenode in heartbeat. (suresh) + + HDFS-1669. Federation: Fix TestHftpFileSystem failure. (suresh) + + HDFS-1670. Federation: remove dnRegistration from Datanode (boryas) + + HDFS-1662. Federation: fix unit test case, TestCheckpoint + and TestDataNodeMXBean (tanping via boryas) + + HDFS-1671. Federation: shutdown in DataNode should be able to + shutdown individual BP threads as well as the whole DN (boryas). + + HDFS-1663. Federation: Rename getPoolId() everywhere to + getBlockPoolId() (tanping via boryas) + + HDFS-1652. FederationL Add support for multiple namenodes in + MiniDFSCluster. (suresh) + + HDFS-1672. Federation: refactor stopDatanode(name) to work + with multiple Block Pools (boryas) + + HDFS-1687. Federation: DirectoryScanner changes for + federation (Matt Foley via boryas) + + HDFS-1626. Make BLOCK_INVALIDATE_LIMIT configurable. (szetszwo) + + HDFS-1655. Federation: DatablockScanner should scan blocks for + all the block pools. (jitendra) + + HDFS-1664. Federation: Add block pool storage usage to Namenode WebUI. + (Tanping via suresh) + + HDFS-1674. Federation: Rename BlockPool class to BlockPoolSlice. + (jghoman, Tanping via suresh) + + HDFS-1673. Federation: Datanode changes to track block token secret per + namenode. (suresh) + + HDFS-1677. Federation: Fix TestFsck and TestListCorruptFileBlocks + failures. (Tanping via suresh) + + HDFS-1678. Federation: Remove unnecessary #getBlockpool() + for NameNodeMXBean in FSNameSystem. (Tanping via Suresh) + + HDFS-1688. Federation: Fix failures in fault injection tests, + TestDiskError, TestDatanodeRestart and TestDFSTartupVersions. (suresh) + + HDFS-1696. Federation: when build version doesn't match - + datanode should wait (keep connecting) untill NN comes up + with the right version (boryas) + + HDFS-1681. Balancer: support per pool and per node policies. (szetszwo) + + HDFS-1695. Federation: Fix testOIV and TestDatanodeUtils + (jhoman and tanping via boryas) + + HDFS-1699. Federation: Fix failure of TestBlockReport. + (Matt Foley via suresh) + + HDFS-1698. Federation: TestOverReplicatedBlocks and TestWriteToReplica + failing. (jhoman and jitendra) + + HDFS-1701. Federation: Fix TestHeartbeathandling. + (Erik Steffl and Tanping Wang via suresh) + + HDFS-1693. Federation: Fix TestDFSStorageStateRecovery failure. (suresh) + + HDFS-1694. Federation: SimulatedFSDataset changes to work with + federation and multiple block pools. (suresh) + + HDFS-1689. Federation: Configuration for namenodes. (suresh and jitendra) + + HDFS-1682. Change Balancer CLI for multiple namenodes and balancing + policy. (szetszwo) + + HDFS-1697. Federation: fix TestBlockRecovery (boryas) + + HDFS-1702. Federation: fix TestBackupNode and TestRefreshNamendoes + failures. (suresh) + + HDFS-1706. Federation: TestFileAppend2, TestFileAppend3 and + TestBlockTokenWithDFS failing. (jitendra) + + HDFS-1704. Federation: Add a tool that lists namenodes, secondary and + backup from configuration file. (suresh) + + HDFS-1711. Federation: create method for updating machine name in + DataNode.java (boryas) + + HDFS-1712. Federation: when looking up datanode we should use machineNmae + (in testOverReplicatedBlocks) (boryas) + + HDFS-1709. Federation: Error "nnaddr url param is null" when clicking on a + node from NN Live Node Link. (jitendra) + + HDFS-1714. Federation: refactor upgrade object in DataNode (boryas) + + HDFS-1715. Federation: warning/error not generated when datanode sees + inconsistent/different Cluster ID between namenodes (boryas) + + HDFS-1715. Federation: warning/error not generated when datanode sees + inconsistent/different Cluster ID between namenodes (boryas) + + HDFS-1716. Federation: Add decommission tests for federated namenodes. + (suresh) + + HDFS-1713. Federation: Prevent DataBlockScanner from running in tight loop. + (jitendra) + + HDFS-1721. Federation: Configuration for principal names should not be + namenode specific. (jitendra) + + HDFS-1717. Federation: FSDataset volumeMap access is not synchronized + correctly. (suresh) + + HDFS-1722. Federation: Add flag to MiniDFSCluser to differentiate between + federation and non-federation modes. (boryas via suresh) + + HDFS-1718. Federation: MiniDFSCluster#waitActive() bug causes some tests + to fail. (suresh) + + HDFS-1719. Federation: Fix TestDFSRemove that fails intermittently. + (suresh) + + HDFS-1720. Federation: FSVolumeSet volumes is not synchronized correctly. + (suresh) + + HDFS-1700. Federation: fsck needs to work with federation changes. + (Matt Foley via suresh) + + HDFS-1482. Add listCorruptFileBlocks to DistributedFileSystem. + (Patrick Kling via hairong) + + HDFS-1448. Add a new tool Offline Edits Viewer (oev). (Erik Steffl + via szetszwo) + + HDFS-1735. Federation: merge FSImage change in federation to + FSImage+NNStorage refactoring in trunk. (suresh) + + HDFS-1737. Federation: Update the layout version for federation + changes. (suresh) + + HDFS-1744. Federation: Add new layout version to offline image viewer + and edits viewer. (suresh) + + HDFS-1745. Federation: Fix fault injection test failures. (suresh) + + HDFS-1746. Federation: TestFileAppend3 fails intermittently. (jitendra) + + HDFS-1703. Improve start/stop scripts and add decommission tool for + federation. (Tanping Wang, Erik Steffl via suresh) + + HDFS-1749. Federation: TestListCorruptFileBlocks failing in federation + branch. (jitendra) + + HDFS-1754. Federation: testFsck fails. (boryas) + + HDFS-1755. Federation: The BPOfferService must always connect to namenode + as the login user. (jitendra) + + HDFS-1675. Support transferring RBW between datanodes. (szetszwo) + + HDFS-1791. Federation: Add command to delete block pool directories + from a datanode. (jitendra) + + HDFS-1761. Add a new DataTransferProtocol operation, Op.TRANSFER_BLOCK, + for transferring RBW/Finalized with acknowledgement and without using RPC. + (szetszwo) + + HDFS-1813. Federation: Authentication using BlockToken in RPC to datanode + fails. (jitendra) + + HDFS-1630. Support fsedits checksum. (hairong) + + HDFS-1606. Provide a stronger data guarantee in the write pipeline by + adding a new datanode when an existing datanode failed. (szetszwo) + + HDFS-1442. Api to get delegation token in Hdfs class. (jitendra) + + HDFS-1070. Speedup namenode image loading and saving by storing only + local file names. (hairong) + + HDFS-1751. Intrinsic limits for HDFS files, directories (daryn via boryas). + + HDFS-1873. Federation: Add cluster management web console. + (Tanping Wang via suresh) + + HDFS-1911 HDFS tests for the newly added viewfs + + HDFS-1814. Add "hdfs groups" command to query the server-side groups + resolved for a user. (Aaron T. Myers via todd) + + HDFS-1914. Federation: namenode storage directories must be configurable + specific to name service. (suresh) + + HDFS-1963. Create RPM and Debian packages for HDFS. Changes deployment + layout to be consistent across the binary tgz, rpm, and deb. + (Eric Yang via omalley) + + HDFS-2058. Change Data Transfer wire protocol to use protocol buffers. + (todd) + + HDFS-2055. Add hflush support to libhdfs. (Travis Crawford via eli) + + HDFS-2083. Query JMX statistics over http via JMXJsonServlet. (tanping) + + HDFS-2156. Make hdfs and mapreduce rpm only depend on the same major + version for common and hdfs. (eyang via omalley) + + HDFS-2202. Add a new DFSAdmin command to set balancer bandwidth of + datanodes without restarting. (Eric Payne via szetszwo) + + HDFS-2284. Add a new FileSystem, webhdfs://, for supporting write Http + access to HDFS. (szetszwo) + + HDFS-2317. Support read access to HDFS in WebHDFS. (szetszwo) + + HDFS-2338. Add configuration option to enable/disable WebHDFS. + (jitendra via szetszwo) + + HDFS-2318. Provide authentication to WebHDFS using SPNEGO and delegation + tokens. (szetszwo) + + HDFS-2340. Support getFileBlockLocations and getDelegationToken in WebHDFS. + (szetszwo) + + HDFS-2348. Support getContentSummary and getFileChecksum in WebHDFS. + (szetszwo) + + HDFS-2385. Support renew and cancel delegation tokens in WebHDFS. + (szetszwo) + + HDFS-2539. Support doAs and GETHOMEDIRECTORY in WebHDFS. + (szetszwo) + + IMPROVEMENTS + + HDFS-1875. MiniDFSCluster hard-codes dfs.datanode.address to localhost + (Eric Payne via mattf) + + HDFS-2019. Fix all the places where Java method File.list is used with + FileUtil.list API (Bharath Mundlapudi via mattf) + + HDFS-1934. Fix NullPointerException when certain File APIs return null + (Bharath Mundlapudi via mattf) + + HDFS-1510. Added test-patch.properties required by test-patch.sh (nigel) + + HDFS-1628. Display full path in AccessControlException. (John George + via szetszwo) + + HDFS-1707. Federation: Failure in browsing data on new namenodes. + (jitendra) + + HDFS-1683. Test Balancer with multiple NameNodes. (szetszwo) + + HDFS-1547. Improve decommission mechanism. (suresh) + + HDFS-2143. Federation: In cluster web console, add link to namenode page + that displays live and dead datanodes. (Ravi Prakash via suresh) + + HDFS-1588. Remove hardcoded strings for configuration keys, "dfs.hosts" + and "dfs.hosts.exlude". (Erik Steffl via suresh) + + HDFS-1481. NameNode should validate fsimage before rolling. (hairong) + + HDFS-1506. Refactor fsimage loading code. (hairong) + + HDFS-1533. A more elegant FileSystem#listCorruptFileBlocks API + (HDFS portion) (Patrick Kling via hairong) + + HDFS-1476. listCorruptFileBlocks should be functional while the + name node is in safe mode. (Patrick Kling via hairong) + + HDFS-1534. Fix some incorrect logs in FSDirectory. (eli) + + HDFS-1539. A config option for the datanode to fsycn a block file + when block is completely written. (dhruba) + + HDFS-1335. HDFS side change of HADDOP-6904: RPC compatibility. (hairong) + + HDFS-1557. Separate Storage from FSImage. (Ivan Kelly via jitendra) + + HDFS-560 Enhancements/tuning to hadoop-hdfs/build.xml + + HDFS-1629. Add a method to BlockPlacementPolicy for keeping the chosen + nodes in the output array. (szetszwo) + + HDFS-1731. Allow using a file to exclude certain tests from build (todd) + + HDFS-1736. Remove the dependency from DatanodeJspHelper to FsShell. + (Daryn Sharp via szetszwo) + + HDFS-780. Revive TestFuseDFS. (eli) + + HDFS-1445. Batch the calls in DataStorage to FileUtil.createHardLink(). + (Matt Foley via jghoman) + + HDFS-1763. Replace hard-coded option strings with variables from + DFSConfigKeys. (eli) + + HDFS-1541. Not marking datanodes dead when namenode in safemode. + (hairong) + + HDFS-1120. Make DataNode's block-to-device placement policy pluggable + (Harsh J Chouraria via todd) + + HDFS-1785. In BlockReceiver and DataXceiver, clientName.length() is used + multiple times for determining whether the source is a client or a + datanode. (szetszwo) + + HDFS-1789. Refactor frequently used codes from DFSOutputStream and + DataXceiver. (szetszwo) + + HDFS-1767. Namenode ignores non-initial block report from datanodes + when in safemode during startup. (Matt Foley via suresh) + + HDFS-1817. Move pipeline_Fi_[39-51] from TestFiDataTransferProtocol + to TestFiPipelineClose. (szetszwo) + + HDFS-1760. In FSDirectory.getFullPathName(..), it is better to return "/" + for root directory instead of an empty string. (Daryn Sharp via szetszwo) + + HDFS-1833. Reduce repeated string constructions and unnecessary fields, + and fix comments in BlockReceiver.PacketResponder. (szetszwo) + + HDFS-1486. Generalize CLITest structure and interfaces to faciliate + upstream adoption (e.g. for web testing). (cos) + + HDFS-1844. Move "fs -help" shell command tests from HDFS to COMMOM; see + also HADOOP-7230. (Daryn Sharp via szetszwo) + + HDFS-1840. In DFSClient, terminate the lease renewing thread when all files + being written are closed for a grace period, and start a new thread when + new files are opened for write. (szetszwo) + + HDFS-1854. make failure message more useful in + DFSTestUtil.waitReplication(). (Matt Foley via eli) + + HDFS-1562. Add rack policy tests. (eli) + + HDFS-1856. TestDatanodeBlockScanner waits forever, errs without giving + information. (Matt Foley via eli) + + HDFS-1295. Improve namenode restart times by short-circuiting the + first block reports from datanodes. (Matt Foley via suresh) + Corrected merge error in DataNode.java. (Matt Foley) + + HDFS-1843. Discover file not found early for file append. + (Bharath Mundlapudi via jitendra) + + HDFS-1862. Improve test reliability of HDFS-1594. (Aaron T. Myers via eli) + + HDFS-1846. Preallocate edit log with OP_INVALID instead of zero bytes + to ensure blocks are actually allocated. (Aaron T. Myers via todd) + + HDFS-1741. Provide a minimal pom file to allow integration of HDFS into Sonar + analysis (cos) + + HDFS-1870. Move and rename DFSClient.LeaseChecker to a seperated class + LeaseRenewer. (szetszwo) + + HDFS-1866. Document dfs.datanode.max.transfer.threads in hdfs-default.xml + (Harsh J Chouraria via todd) + + HDFS-1890. Improve the name, class and value type of the map + LeaseRenewer.pendingCreates. (szetszwo) + + HDFS-1865. Share LeaseRenewer among DFSClients so that there is only a + LeaseRenewer thread per namenode per user. (szetszwo) + + HDFS-1906. Remove logging exception stack trace in client logs when one of + the datanode targets to read from is not reachable. (suresh) + + HDFS-1378. Edit log replay should track and report file offsets in case of + errors. (Aaron T. Myers and Todd Lipcon via todd) + + HDFS-1917. Separate hdfs jars from common in ivy configuration. (Eric Yang + via szetszwo) + + HDFS-1899. GenericTestUtils.formatNamenode should be moved to DFSTestUtil + (Ted Yu via todd) + + HDFS-1117. Metrics 2.0 HDFS instrumentation. (Luke Lu via suresh) + + HDFS-1946. HDFS part of HADOOP-7291. (eli) + + HDFS-1945. Removed the deprecated fields in DataTransferProtocol. + (szetszwo) + + HDFS-1730. Use DaemonFactory from common and delete it from HDFS. + (Tanping via suresh) + + HDFS-1573. Add useful tracing information to Lease Renewer thread names + (todd) + + HDFS-1939. In ivy.xml, test conf should not extend common conf. + (Eric Yang via szetszwo) + + HDFS-1332 Include more information in exceptions and debug messages + when BlockPlacementPolicy cannot be satisfied. (Ted Yu via szetszwo) + + HDFS-1958. Confirmation should be more lenient of user input when + formatting the NameNode. (todd) + + HDFS-1905. Improve namenode -format command by not making -clusterId + parameter mandatory. (Bharath Mundlapudi via suresh) + + HDFS-1877. Add a new test for concurrent read and write. (CW Chung + via szetszwo) + + HDFS-1959. Better error message for missing namenode directory. (eli) + + HDFS-1996. ivy: hdfs test jar should be independent to common test jar. + (Eric Yang via szetszwo) + + HDFS-1812. TestHDFSCLI should clean up cluster in teardown method. + (Uma Maheswara Rao G via todd) + + HDFS-1884. Improve TestDFSStorageStateRecovery to properly throw in the + case of errors. (Aaron T. Myers via todd) + + HDFS-1727. fsck command should display command usage if user passes any + illegal argument. (Sravan Kumar via todd) + + HDFS-1636. If dfs.name.dir points to an empty dir, namenode format + shouldn't require confirmation. (Harsh J Chouraria via todd) + + HDFS-1966. Encapsulate individual DataTransferProtocol op headers. + (szetszwo) + + HDFS-2024. Format TestWriteRead source codes. (CW Chung via szetszwo) + + HDFS-1968. Enhance TestWriteRead to support position/sequential read, + append, truncate and verbose options. (CW Chung via szetszwo) + + HDFS-1986. Add option to get http/https address from + DFSUtil#getInfoServer(). (Tanping via suresh) + + HDFS-2029. In TestWriteRead, check visible length immediately after + openning the file and fix code style. (John George via szetszwo) + + HDFS-2040. Only build libhdfs if a flag is passed. (eli) + + HDFS-1586. Add InterfaceAudience and InterfaceStability annotations to + MiniDFSCluster. (suresh) + + HDFS-2003. Separate FSEditLog reading logic from edit log memory state + building logic. (Ivan Kelly via todd) + + HDFS-2066. Create a package and individual class files for + DataTransferProtocol. (szetszwo) + + HADOOP-7106. Reorganize project SVN layout to "unsplit" the projects. + (todd, nigel) + + HDFS-2046. Force entropy to come from non-true random for tests. (todd) + + HDFS-2073. Add @Override annotation to NameNode. (suresh) + + HDFS-420. Fuse-dfs should cache fs handles. (Brian Bockelman and eli) + + HDFS-1568. Improve the log messages in DataXceiver. (Joey Echeverria via + szetszwo) + + HDFS-2100. Improve TestStorageRestore. (atm) + + HDFS-2092. Remove some object references to Configuration in DFSClient. + (Bharath Mundlapudi via szetszwo) + + HDFS-2087. Declare methods in DataTransferProtocol interface, and change + Sender and Receiver to implement the interface. (szetszwo) + + HDFS-1723. quota errors messages should use the same scale. (Jim Plush via + atm) + + HDFS-2110. StreamFile and ByteRangeInputStream cleanup. (eli) + + HDFS-2107. Move block management code from o.a.h.h.s.namenode to a new + package o.a.h.h.s.blockmanagement. (szetszwo) + + HDFS-2109. Store uMask as member variable to DFSClient.Conf. (Bharath + Mundlapudi via szetszwo) + + HDFS-2111. Add tests for ensuring that the DN will start with a few bad + data directories. (Harsh J Chouraria via todd) + + HDFS-2134. Move DecommissionManager to the blockmanagement package. + (szetszwo) + + HDFS-1977. Stop using StringUtils.stringifyException(). + (Bharath Mundlapudi via jitendra) + + HDFS-2131. Add new tests for the -overwrite/-f option in put and + copyFromLocal by HADOOP-7361. (Uma Maheswara Rao G via szetszwo) + + HDFS-2140. Move Host2NodesMap to the blockmanagement package. (szetszwo) + + HDFS-2154. In TestDFSShell, use TEST_ROOT_DIR and fix some deprecated + warnings. (szetszwo) + + HDFS-2153. Move DFSClientAdapter to test and fix some javac warnings in + OfflineEditsViewerHelper. (szetszwo) + + HDFS-2159. Deprecate DistributedFileSystem.getClient() and fixed the + deprecated warnings in DFSAdmin. (szetszwo) + + HDFS-2157. Improve header comment in o.a.h.hdfs.server.namenode.NameNode. + (atm via eli) + + HDFS-2147. Move cluster network topology to block management and fix some + javac warnings. (szetszwo) + + HDFS-2141. Remove NameNode roles Active and Standby (they become + states of the namenode). (suresh) + + HDFS-2161. Move createNamenode(..), createClientDatanodeProtocolProxy(..) + and Random object creation to DFSUtil; move DFSClient.stringifyToken(..) + to DelegationTokenIdentifier. (szetszwo) + + HDFS-1774. Small optimization to FSDataset. (Uma Maheswara Rao G via eli) + + HDFS-2167. Move dnsToSwitchMapping and hostsReader from FSNamesystem to + DatanodeManager. (szetszwo) + + HDFS-2116. Use Mokito in TestStreamFile and TestByteRangeInputStream. + (Plamen Jeliazkov via shv) + + HDFS-2112. Move ReplicationMonitor to block management. (Uma Maheswara + Rao G via szetszwo) + + HDFS-1739. Add available volume size to the error message when datanode + throws DiskOutOfSpaceException. (Uma Maheswara Rao G via szetszwo) + + HDFS-2144. If SNN shuts down during initialization it does not log the + cause. (Ravi Prakash via atm) + + HDFS-2180. Refactor NameNode HTTP server into new class. (todd) + + HDFS-2198. Remove hardcoded configuration keys. (suresh) + + HDFS-2149. Move EditLogOp serialization formats into FsEditLogOp + implementations. (Ivan Kelly via todd) + + HDFS-2191. Move datanodeMap from FSNamesystem to DatanodeManager. + (szetszwo) + + HDFS-2200. Change FSNamesystem.LOG to package private. (szetszwo) + + HDFS-2195. Refactor StorageDirectory to not be an non-static inner class. + (todd via eli) + + HDFS-2212. Refactor double-buffering code out of EditLogOutputStreams. + (todd via eli) + + HDFS-2199. Move blockTokenSecretManager from FSNamesystem to BlockManager. + (Uma Maheswara Rao G via szetszwo) + + HDFS-2187. Make EditLogInputStream act like an iterator over FSEditLogOps + (Ivan Kelly and todd via todd) + + HDFS-2225. Refactor edit log file management so it's not in classes + which should be generic to the type of edit log storage. (Ivan Kelly + via todd) + + HDFS-2108. Move datanode heartbeat handling from namenode package to + blockmanagement package. (szetszwo) + + HDFS-2226. Clean up counting of operations in FSEditLogLoader (todd) + + HDFS-2228. Move block and datanode code from FSNamesystem to + BlockManager and DatanodeManager. (szetszwo) + + HDFS-2238. In NamenodeFsck.toString(), uses StringBuilder.(..) instead of + string concatenation. (Uma Maheswara Rao G via szetszwo) + + HDFS-2230. ivy to resolve/retrieve latest common-tests jar published by + hadoop common maven build. (gkesavan) + + HDFS-2227. getRemoteEditLogManifest should pull its information from + FileJournalManager during checkpoint process (Ivan Kelly and Todd Lipcon + via todd) + + HDFS-2239. Reduce access levels of the fields and methods in FSNamesystem. + (szetszwo) + + HDFS-2241. Remove implementing FSConstants interface to just get the + constants from the interface. (suresh) + + HDFS-2237. Change UnderReplicatedBlocks from public to package private. + (szetszwo) + + HDFS-2233. Add WebUI tests with URI reserved chars. (eli) + + HDFS-2265. Remove unnecessary BlockTokenSecretManager fields/methods from + BlockManager. (szetszwo) + + HDFS-2260. Refactor BlockReader into an interface and implementation. + (todd) + + HDFS-2096. Mavenization of hadoop-hdfs (Alejandro Abdelnur via tomwhite) + + HDFS-2273. Refactor BlockManager.recentInvalidateSets to a new class. + (szetszwo) + + HDFS-2266. Add Namesystem and SafeMode interfaces to avoid directly + referring to FSNamesystem in BlockManager. (szetszwo) + + HDFS-1217. Change some NameNode methods from public to package private. + (Laxman via szetszwo) + + HDFS-1620. Rename HdfsConstants -> HdfsServerConstants, FSConstants -> + HdfsConstants. (Harsh J Chouraria via atm) + + HDFS-2197. Refactor RPC call implementations out of NameNode class (todd) + + HDFS-2332. Add test for HADOOP-7629 (using an immutable FsPermission + object as an RPC parameter fails). (todd) + + HDFS-2363. Move datanodes size printing from FSNamesystem.metasave(..) + to BlockManager. (Uma Maheswara Rao G via szetszwo) + + HDFS-2209. Make MiniDFS easier to embed in other apps. (stevel) + + HDFS-2205. Log message for failed connection to datanode is not + followed by a success message. (Ravi Prakash via stevel) + + HDFS-2401. Running a set of methods in a Single Test Class. + (Jonathan Eagles via mahadev) + + HDFS-2471. Add federation documentation. (suresh) + + HDFS-2485. Improve code layout and constants in UnderReplicatedBlocks + (stevel) + + HDFS-2356. Support case insensitive query parameter names in WebHDFS. + (szetszwo) + + HDFS-2368. Move SPNEGO conf properties from hdfs-default.xml to + hdfs-site.xml. (szetszwo) + + HDFS-2395. Add a root element in the JSON responses of WebHDFS. + (szetszwo) + + HDFS-2427. Change the default permission in WebHDFS to 755 and add range + check/validation for all parameters. (szetszwo) + + HDFS-2501. Add version prefix and root methods to WebHDFS. (szetszwo) + + HDFS-1869. mkdirs should use the supplied permission for all of the created + directories. (Daryn Sharp via szetszwo) + + HDFS-2355. Federation: enable using the same configuration file across + all the nodes in the cluster. (suresh) + + HDFS-2371. Refactor BlockSender.java for better readability. (suresh) + + HDFS-2493. Remove reference to FSNamesystem in blockmanagement classes. + (szetszwo) + + HDFS-2294. Download of commons-daemon TAR should not be under target (tucu) + + HDFS-2322. the build fails in Windows because commons-daemon TAR cannot be + fetched. (tucu) + + HDFS-2436. Change FSNamesystem.setTimes(..) for allowing setting times on + directories. (Uma Maheswara Rao G via szetszwo) + + HDFS-2512. Add textual error message to data transfer protocol responses + (todd) + + HDFS-2521. Remove custom checksum headers from data transfer protocol + (todd) + + HDFS-2308. NamenodeProtocol.endCheckpoint is vestigial and can be removed. + (eli) + + HDFS-2507. Allow saveNamespace operations to be canceled. (todd) + + OPTIMIZATIONS + + HDFS-1458. Improve checkpoint performance by avoiding unnecessary image + downloads and loading. (hairong) + + HDFS-1601. Pipeline ACKs are sent as lots of tiny TCP packets (todd) + + HDFS-1826. NameNode should save image to name directories in parallel + during upgrade. (Matt Foley via hairong) + + HDFS-2030. Improve usability of namenode -upgrade command. + (Bharath Mundlapudi via suresh) + + HDFS-2056. Update fetchdt usage. (Tanping Wang via jitendra) + + HDFS-2118. Couple dfs data dir improvements. (eli) + + HDFS-2500. Avoid file system operations in BPOfferService thread while + processing deletes. (todd) + + HDFS-2465. Add HDFS support for fadvise readahead and drop-behind. (todd) + + BUG FIXES + + HDFS-2344. Fix the TestOfflineEditsViewer test failure in 0.23 branch. + (Uma Maheswara Rao G via mattf) + + HDFS-2347. Fix checkpointTxnCount's comment about editlog size. + (Uma Maheswara Rao G via mattf) + + HDFS-2011. Removal and restoration of storage directories on checkpointing + failure doesn't work properly. (Ravi Prakash via mattf) + + HDFS-1955. FSImage.doUpgrade() was made too fault-tolerant by HDFS-1826. + (mattf) + + HDFS-2061. Two minor bugs in BlockManager block report processing. (mattf) + + HDFS-1449. Fix test failures - ExtendedBlock must return + block file name in #getBlockName(). (suresh) + + HDFS-1680. Fix TestBalancer. (szetszwo) + + HDFS-1705. Balancer command throws NullPointerException. (suresh via + szetszwo) + + HDFS-1559. Add missing UGM overrides to TestRefreshUserMappings + (Todd Lipcon via eli) + + HDFS-1585. Fix build after HDFS-1547 (todd) + + HDFS-1684. Balancer cannot start with with multiple namenodes. (szetszwo) + + HDFS-1516. mvn-install is broken after 0.22 branch creation. (cos) + + HDFS-1360. TestBlockRecovery should bind ephemeral ports. + (Todd Lipcon via hairong) + + HDFS-1551. Fix pom templates dependency list (gkesavan) + + HDFS-1509. A savenamespace command writes the fsimage and edits into + all configured directories. (dhruba) + + HDFS-1540. Make Datanode handle errors from RPC calls to namenode + more elegantly. (dhruba) + + HDFS-1463. Accesstime of a file is not updated in safeMode. (dhruba) + + HDFS-863. Potential deadlock in TestOverReplicatedBlocks. + (Ken Goodhope via jghoman) + + HDFS-1607. Fix referenced to misspelled method name getProtocolSigature + (todd) + + HDFS-1610. Fix TestClientProtocolWithDelegationToken and TestBlockToken + on trunk after HADOOP-6904 (todd) + + HDFS-1600. Fix release audit warnings on trunk. (todd) + + HDFS-1691. Remove a duplicated static initializer for reading default + configurations in DFSck. (Alexey Diomin via szetszwo) + + HDFS-1748. Balancer utilization classification is incomplete. (szetszwo) + + HDFS-1738. change hdfs jmxget to return an empty string instead of + null when an attribute value is not available (tanping vi boryas) + + HDFS-1757. Don't compile fuse-dfs by default. (eli) + + HDFS-1770. TestFiRename fails due to invalid block size. (eli) + + HDFS-1797. Fix new findbugs warning introduced by HDFS-1120 (todd) + + HDFS-1611. Fix up some log messages in DFSClient and MBean registration + (Uma Maheswara Rao G via todd) + + HDFS-1543. Reduce dev. cycle time by moving system testing artifacts from + default build and push to maven for HDFS (Luke Lu via cos) + + HDFS-1818. TestHDFSCLI is failing on trunk after HADOOP-7202. + (Aaron T. Myers via todd) + + HDFS-1828. TestBlocksWithNotEnoughRacks intermittently fails assert. + (Matt Foley via eli) + + HDFS-1824. delay instantiation of file system object until it is + needed (linked to HADOOP-7207) (boryas) + + HDFS-1831. Fix append bug in FileContext and implement CreateFlag + check (related to HADOOP-7223). (suresh) + + HDFS-1594. When the disk becomes full Namenode is getting shutdown and + not able to recover. (Aaron T. Myers via eli) + + HDFS-1822. Handle editlog opcode conflict with 0.20.203 during upgrade, + by throwing an error to indicate the editlog needs to be empty. + (suresh) + + HDFS-1808. TestBalancer waits forever, errs without giving information. + (Matt Foley via eli) + + HDFS-1829. TestNodeCount waits forever, errs without giving information. + (Matt Foley via eli) + + HDFS-1860. when renewing/canceling DelegationToken over http we need to + pass exception information back to the caller.(boryas) + + HDFS-1871. Mapreduce build fails due to MiniDFSCluster change from + HDFS-1052. (suresh) + + HDFS-1876. One MiniDFSCluster constructor ignores numDataNodes parameter + (todd) + + HDFS-1773. Do not show decommissioned datanodes, which are not in both + include and exclude lists, on web and JMX interfaces. + (Tanping Wang via szetszwo) + + HDFS-1888. MiniDFSCluster#corruptBlockOnDatanodes() access must be + public. (suresh) + + HDFS-1889. incorrect path in start/stop dfs script. (John George via eli) + + HDFS-1891. Disable IPV6 for junit tests to fix TestBackupNode failure. + (suresh) + + HDFS-1898. Tests failing on trunk due to use of NameNode.format. + (todd via eli) + + HDFS-1902. Fix setrep path display for TestHDFSCLI. (Daryn Sharp + via szetszwo) + + HDFS-1827. Fix timeout problem in TestBlockReplacement. (Matt Foley + via szetszwo) + + HDFS-1908. Fix a NullPointerException in fi.DataTransferTestUtil. + (szetszwo) + + HDFS-1912. Update tests for FsShell standardized error messages. + (Daryn Sharp via szetszwo) + + HDFS-1903. Fix path display for rm/rmr in TestHDFSCLI and TestDFSShell. + (Daryn Sharp via szetszwo) + + HDFS-1627. Fix NullPointerException in Secondary NameNode. (hairong) + + HDFS-1928. Fix path display for touchz in TestHDFSCLI. + (Daryn Sharp via todd) + + HDFS-1938. Fix ivy-retrieve-hdfs dependence in build.xml and aop.xml. + (Eric Yang via szetszwo) + + HDFS-1929. TestEditLogFileOutputStream fails if running on same host as NN + (Aaron T. Myers via todd) + + HDFS-1933. Update TestDFSShell for improved "test" shell command. (Daryn + Sharp via todd) + + HDFS-1931. Update TestDFSShell for improved "du" shell command. (Daryn + Sharp via todd) + + HDFS-1439. HDFS Federation: Fix compilation error in TestFiHftp. (suresh) + + HDFS-1881. Federation: after taking snapshot the current directory + of datanode is empty. (Tanping Wang via suresh) + + HDFS-1927. Fix a bug which causes ip=null in NameNode audit log. + (John George via szetszwo) + + HDFS-1953. Federation: Change name node mxbean name in cluster web + console. (Tanping Wang via suresh) + + HDFS-1922. Fix recurring failure of TestJMXGet (Luke Lu via todd) + + HDFS-1371. One bad node can incorrectly flag many files as corrupt. + (Tanping Wang via jitendra) + + HDFS-1943. Fail to start datanode while start-dfs.sh is executed by + root user. (Wei Yongjun via jghoman) + + HDFS-1983. Fix path display for copy and rm commands in TestHDFSCLI and + TestDFSShell. (Daryn Sharp via todd) + + HDFS-1999. Tests use deprecated configs. (Aaron T. Myers via eli) + + HDFS-1592. Datanode startup doesn't honor volumes.tolerated. + (Bharath Mundlapudi via jitendra) + + HDFS-1920. libhdfs does not build for ARM processors. + (Trevor Robinson via eli) + + HDFS-1936. Layout version change from HDFS-1822 causes upgrade failure. + (suresh) + + HDFS-2021. Update numBytesAcked before sending the ack in PacketResponder. + (John George via szetszwo) + + HDFS-2020. Fix TestDFSUpgradeFromImage by removing the use of DataNode + as a singleton. (suresh via todd) + + HDFS-2022. ant binary should build libhdfs. (Eric Yang via eli) + + HDFS-2014. Change HDFS scripts to work in developer enviroment post + RPM packaging changes. (Eric Yang via suresh) + + HDFS-1995. Federation: Minor bug fixes and modification cluster web UI. + (Tanping Wang via suresh) + + HDFS-1907. Fix position read for reading still-being-written file in + DFSInputStream. (John George via szetszwo) + + HDFS-1923. In TestFiDataTransferProtocol2, reduce random sleep time period + and increase the number of datanodes. (szetszwo) + + HDFS-1149. Lease reassignment should be persisted to the edit log. + (Aaron T. Myers via todd) + + HDFS-1998. Federation: Make refresh-namenodes.sh refresh all the + namenode. (Tanping Wang via suresh) + + HDFS-2041. OP_CONCAT_DELETE doesn't properly restore modification time + of the concatenated file when edit logs are replayed. (todd) + + HDFS-2063. libhdfs test is broken. (Eric Yang via eli) + + HDFS-2067. Bump DATA_TRANSFER_VERSION constant in trunk after introduction + of protocol buffers in the protocol. (szetszwo via todd) + + HDFS-2069. Incorrect default trash interval value in the docs. + (Harsh J Chouraria via eli) + + HDFS-1942. Datanode must exist when all the block pool service threads + exit. (Bharath Mundlapudi via suresh) + + HDFS-1656. Fixes an issue to do with fetching of delegation tokens in + HftpFileSystem. Contributed by Kan Zhang. + + HDFS-1692. In secure mode, Datanode process doesn't exit when disks + fail. (Bharath Mundlapudi via suresh) + + HDFS-1734. 'Chunk size to view' option is not working in Name Node UI. + (Uma Maheswara Rao G via jitendra) + + HDFS-2086. If the include hosts list contains host names, after restarting + namenode, data nodes registration is denied. Contributed by Tanping Wang. + + HDFS-2082. SecondaryNameNode web interface doesn't show the right info. (atm) + + HDFS-1321. If service port and main port are the same, there is no clear + log message explaining the issue. (Jim Plush via atm) + + HDFS-1381. HDFS javadocs hard-code references to dfs.namenode.name.dir and + dfs.datanode.data.dir parameters (Jim Plush via atm) + + HDFS-2053. Bug in INodeDirectory#computeContentSummary warning. + (Michael Noll via eli) + + HDFS-1990. Fix resource leaks in BlockReceiver.close(). (Uma Maheswara + Rao G via szetszwo) + + HDFS-2034. Length in DFSInputStream.getBlockRange(..) becomes -ve when + reading only from a currently being written block. (John George via + szetszwo) + + HDFS-2132. Potential resource leak in EditLogFileOutputStream.close. (atm) + + HDFS-2120. on reconnect, DN can connect to NN even with different source + versions. (John George via atm) + + HDFS-2152. TestWriteConfigurationToDFS causing the random failures. (Uma + Maheswara Rao G via atm) + + HDFS-2114. re-commission of a decommissioned node does not delete + excess replicas. (John George via mattf) + + HDFS-1776. Bug in Concat code. (Bharath Mundlapudi via Dmytro Molkov) + + HDFS-2196. Make ant build system work with hadoop-common JAR generated + by Maven. (Alejandro Abdelnur via tomwhite) + + HDFS-2245. Fix a NullPointerException in BlockManager.chooseTarget(..). + (szetszwo) + + HDFS-2229. Fix a deadlock in namenode by enforcing lock acquisition + ordering. (szetszwo) + + HDFS-2235. Encode servlet paths. (eli) + + HDFS-2186. DN volume failures on startup are not counted. (eli) + + HDFS-2240. Fix a deadlock in LeaseRenewer by enforcing lock acquisition + ordering. (szetszwo) + + HDFS-73. DFSOutputStream does not close all the sockets. + (Uma Maheswara Rao G via eli) + + HDFS-1257. Fix a race condition on BlockManager.recentInvalidateSets. + (Eric Payne via szetszwo) + + HDFS-2267. DataXceiver thread name incorrect while waiting on op during + keepalive. (todd) + + HDFS-1480. All replicas of a block can end up on the same rack when + some datanodes are decommissioning. (todd) + + HDFS-2286. DataXceiverServer logs AsynchronousCloseException at shutdown + (todd) + + HDFS-2289. Ensure jsvc is bundled with the HDFS distribution artifact. + (Alejandro Abdelnur via acmurthy) + + HDFS-2314. MRV1 test compilation broken after HDFS-2197 (todd) + + HDFS-2323. start-dfs.sh script fails for tarball install (tomwhite) + + HDFS-2346. TestHost2NodesMap & TestReplicasMap will fail depending upon + execution order of test methods (Laxman via atm) + + HDFS-2345. TestLeaseRecovery2 fails on 0.23 branch (Uma Maheswara Rao G + via atm) + + HDFS-2412. Add backwards-compatibility layer for renamed FSConstants + class (todd) + + HDFS-2414. Fix TestDFSRollback to avoid spurious failures. (todd) + + HDFS-2422. The NN should tolerate the same number of low-resource volumes + as failed volumes (atm) + + HDFS-2467. HftpFileSystem uses incorrect compare for finding delegation + tokens. (omalley) + + HDFS-2331. Fix WebHdfsFileSystem compilation problems for a bug in JDK + version < 1.6.0_26. (Abhijit Suresh Shingate via szetszwo) + + HDFS-2333. Change DFSOutputStream back to package private, otherwise, + there are two SC_START_IN_CTOR findbugs warnings. (szetszwo) + + HDFS-2366. Initialize WebHdfsFileSystem.ugi in object construction. + (szetszwo) + + HDFS-2361. hftp is broken, fixed username checks in JspHelper. (jitendra) + + HDFS-2403. NamenodeWebHdfsMethods.generateDelegationToken(..) does not use + the renewer parameter. (szetszwo) + + HDFS-2409. _HOST in dfs.web.authentication.kerberos.principal. (jitendra) + + HDFS-2404. WebHDFS liststatus json response is not correct. (suresh) + + HDFS-2441. Remove the Content-Type set by HttpServer.QuotingInputFilter in + WebHDFS responses. (szetszwo) + + HDFS-2428. Convert com.sun.jersey.api.ParamException$QueryParamException + to IllegalArgumentException and response it as http BAD_REQUEST in WebHDFS. + (szetszwo) + + HDFS-2424. Added a root element "HdfsFileStatuses" for the response + of WebHDFS listStatus. (szetszwo) + + MAPREDUCE-2764. Fix renewal of dfs delegation tokens. (Owen via jitendra) + + HDFS-2439. Fix NullPointerException in WebHDFS when opening a non-existing + file or creating a file without specifying the replication parameter. + (szetszwo) + + HDFS-2453. Fix http response code for partial content in WebHDFS, added + getDefaultBlockSize() and getDefaultReplication() in WebHdfsFileSystem + and cleared content type in ExceptionHandler. (szetszwo) + + HDFS-2411. The the auth to local mappings are not being respected, with + WebHDFS enabled. (jitendra) + + HDFS-2494. Close the streams and DFSClient in DatanodeWebHdfsMethods. + (Uma Maheswara Rao G via szetszwo) + + HDFS-2298. Fix TestDfsOverAvroRpc by changing ClientProtocol to + not include multiple methods of the same name. (cutting) + + HDFS-2432. WebHDFS: response FORBIDDEN when setReplication on non-files; + clear umask before creating a flie; throw IllegalArgumentException if + setOwner with both owner and group empty; throw FileNotFoundException if + getFileStatus on non-existing files; fix bugs in getBlockLocations; and + changed getFileChecksum json response root to "FileChecksum". (szetszwo) + + HDFS-2065. Add null checks in DFSClient.getFileChecksum(..). (Uma + Maheswara Rao G via szetszwo) + + HDFS-2522. Disable TestDfsOverAvroRpc test. (suresh) + + HDFS-2416. distcp with a WebHDFS uri on a secure cluster fails. (jitendra) + + HDFS-2527. WebHDFS: remove the use of "Range" header in Open; use ugi + username if renewer parameter is null in GetDelegationToken; response OK + when setting replication for non-files; rename GETFILEBLOCKLOCATIONS to + GET_BLOCK_LOCATIONS and state that it is a private unstable API; replace + isDirectory and isSymlink with enum {FILE, DIRECTORY, SYMLINK} in + HdfsFileStatus JSON object. (szetszwo) + + HDFS-2528. WebHDFS: set delegation kind to WEBHDFS and add a HDFS token + when http requests are redirected to datanode. (szetszwo) + + HDFS-2540. WebHDFS: change "Expect: 100-continue" to two-step write; change + "HdfsFileStatus" and "localName" respectively to "FileStatus" and + "pathSuffix" in JSON response. (szetszwo) + + BREAKDOWN OF HDFS-1073 SUBTASKS + + HDFS-1521. Persist transaction ID on disk between NN restarts. + (Ivan Kelly and Todd Lipcon via todd) + + HDFS-1538. Refactor more startup and image loading code out of FSImage. + (todd) + + HDFS-1729. Add code to detect valid length of an edits file. (todd) + + HDFS-1793. Add code to inspect a storage directory with txid-based + filenames (todd) + + HDFS-1794. Add code to list which edit logs are available on a remote NN + (todd) + + HDFS-1858. Add state management variables to FSEditLog (Ivan Kelly and Todd + Lipcon via todd) + + HDFS-1859. Add some convenience functions to iterate over edit log streams + (Ivan Kelly and Todd Lipcon via todd) + + HDFS-1894. Add constants for LAYOUT_VERSIONs in edits log branch (todd) + + HDFS-1892. Fix EditLogFileInputStream.getValidLength to be aware of + OP_INVALID filler (todd) + + HDFS-1799. Refactor log rolling and filename management out of FSEditLog + (Ivan Kelly and Todd Lipcon via todd) + + HDFS-1801. Remove use of timestamps to identify checkpoints and logs (todd) + + HDFS-1930. TestDFSUpgrade failing in HDFS-1073 branch (todd) + + HDFS-1800. Extend image checksumming to function with multiple fsimage + files per directory. (todd) + + HDFS-1725. Set storage directories only at FSImage construction (Ivan Kelly + via todd) + + HDFS-1926. Remove references to StorageDirectory from JournalManager + interface (Ivan Kelly via todd) + + HDFS-1893. Change edit logs and images to be named based on txid (todd) + + HDFS-1985. Clean up image transfer servlet (todd) + + HDFS-1984. Enable multiple secondary namenodes to run simultaneously (todd) + + HDFS-1987. Re-enable TestCheckpoint.testSecondaryImageDownload which was + not running previously. (todd) + + HDFS-1993. TestCheckpoint needs to clean up between cases (todd) + + HDFS-1992. Remove vestiges of NNStorageListener. (todd) + + HDFS-1991. Some refactoring of Secondary NameNode to be able to share more + code with the BackupNode or CheckpointNode. (todd) + + HDFS-1994. Fix race conditions when running two rapidly checkpointing + Secondary NameNodes. (todd) + + HDFS-2001. Remove use of previous.checkpoint and lastcheckpoint.tmp + directories (todd) + + HDFS-2015. Remove checkpointTxId from VERSION file. (todd) + + HDFS-2016. Add infrastructure to remove or archive old and unneeded storage + files within the name directories. (todd) + + HDFS-2047. Improve TestNamespace and TestEditLog in HDFS-1073 branch. + (todd) + + HDFS-2048. Add upgrade tests and fix upgrade from 0.22 with corrupt image. + (todd) + + HDFS-2027. Image inspector should return finalized logs before unfinalized + logs. (todd) + + HDFS-2074. Determine edit log validity by truly reading and validating + transactions. (todd) + + HDFS-2085. Finalize in-progress edit logs at startup. (todd) + + HDFS-2026. SecondaryNameNode should properly handle the case where the + NameNode is reformatted. (todd) + + HDFS-2077. Address checkpoint upload when one of the storage dirs is failed + (todd) + + HDFS-2078. NameNode should not clear directory when restoring removed + storage. (todd) + + HDFS-2088. Move edits log archiving logic into FSEditLog/JournalManager + (todd) + + HDFS-2093. Handle case where an entirely empty log is left during NN crash + (todd) + + HDFS-2102. Zero-pad edits filename to make them lexically sortable. (Ivan + Kelly via todd) + + HDFS-2010. Fix NameNode to exit if all edit streams become inaccessible. + (atm via todd) + + HDFS-2123. Checkpoint interval should be based on txn count, not size. + (todd) + + HDFS-1979. Fix backupnode for new edits/image layout. (todd) + + HDFS-2101. Fix remaining unit tests for new storage filenames. (todd) + + HDFS-2133. Address remaining TODOs and pre-merge cleanup on HDFS-1073 + branch. (todd) + + HDFS-1780. Reduce need to rewrite FSImage on startup. (todd) + + HDFS-2104. Add a flag to the 2NN to format its checkpoint dirs on startup. + (todd) + + HDFS-2135. Fix regression of HDFS-1955 in HDFS-1073 branch. (todd) + + HDFS-2160. Fix CreateEditsLog test tool in HDFS-1073 branch. (todd) + + HDFS-2168. Reenable TestEditLog.testFailedOpen and fix exposed bug. (todd) + + HDFS-2169. Clean up TestCheckpoint and remove TODOs (todd) + + HDFS-2170. Address remaining TODOs in HDFS-1073 branch. (todd) + + HDFS-2172. Address findbugs and javadoc warnings in HDFS-1073 branch. + (todd) + + HDFS-2445. Ensure failed tests exit with proper error code. (Jonathan + Eagles via acmurthy) + +Release 0.22.1 - Unreleased + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + HDFS-2718. Optimize OP_ADD in edits loading. (shv) + + HDFS-2886. CreateEditLogs should generate a realistic edit log. (shv) + + BUG FIXES + + HDFS-2877. If locking of a storage dir fails, it will remove the other + NN's lock file on exit. (todd) + +Release 0.22.0 - 2011-11-29 + + INCOMPATIBLE CHANGES + + HDFS-1825. Remove thriftfs contrib. (nigel via eli) + + NEW FEATURES + + HDFS-992. Re-factor block access token implementation to conform to the + generic Token interface in Common (Kan Zhang and Jitendra Pandey via jghoman) + + HDFS-599. Allow NameNode to have a seprate port for service requests from + client requests. (Dmytro Molkov via hairong) + + HDFS-1004. Update NN to support Kerberized SSL from HADOOP-6584. + (jghoman and Kan Zhang via jghoman) + + HDFS-1005. Fsck security. (borya and Kan Zhang via jghoman) + + HDFS-1006. getImage/putImage http requests should be https for the case + of security enabled. (borya and jghoman via jghoman) + + HDFS-1033. In secure clusters, NN and SNN should verify that the remote + principal during image and edits transfer. (jghoman) + + HDFS-1023. Allow http server to start as regular principal if https + principal not defined. (jghoman) + + HDFS-1150. Verify datanodes' identities to clients in secure clusters. + (jghoman) + + HDFS-1330. Make RPCs to DataNodes timeout. (hairong) + Added additional unit tests per HADOOP-6889. (John George via mattf) + + HDFS-202. HDFS support of listLocatedStatus introduced in HADOOP-6870. + HDFS piggyback block locations to each file status when listing a + directory. (hairong) + + HDFS-1361. Add -fileStatus operation to NNThroughputBenchmark. (shv) + + HDFS-1435. Provide an option to store fsimage compressed. (hairong) + + HDFS-903. Support fsimage validation through MD5 checksum. (hairong) + + HDFS-1457. Provide an option to throttle image transmission between + pimary and secondary NameNodes. (Yifei Lu and hairong via hairong) + + HDFS-1164. TestHdfsProxy is failing. (Todd Lipcon via cos) + + HDFS-811. Add metrics, failure reporting and additional tests for HDFS-457. + (eli) + + HDFS-895. Allow hflush/sync to occur in parallel with new writes + to the file. (Todd Lipcon via hairong) + + HDFS-528. Add ability for safemode to wait for a minimum number of + live datanodes (Todd Lipcon via eli) + + HDFS-1753. Resource Leak in StreamFile. (Uma Maheswara Rao G via eli) + + IMPROVEMENTS + + HDFS-1304. Add a new unit test for HftpFileSystem.open(..). (szetszwo) + + HDFS-1096. fix for prev. commit. (boryas) + + HDFS-1096. allow dfsadmin/mradmin refresh of superuser proxy group + mappings (boryas) + + HDFS-1146. Javadoc for getDelegationTokenSecretManager in FSNamesystem (jnp via boryas) + + HDFS-1132. Refactor TestFileStatus (Eli Collins via cos) + + HDFS-1163. normalize property names for JT/NN kerberos principal + names in configuration (from HADOOP 6633) (boryas) + + HDFS-1003. authorization checks for inter-server protocol + (based on HADOOP-6600) (boryas) + + HDFS-1061. Memory footprint optimization for INodeFile object. + (Bharath Mundlapudi via jghoman) + + HDFS-1079. Throw exceptions as specified by the AbstractFileSystem + in HDFS implemenation and protocols. (suresh) + + HDFS-1112. Edit log buffer should not grow unfoundedly. (hairong) + + HDFS-1119. Introduce a GSet interface to BlocksMap. (szetszwo) + + HDFS-1184. Replace tabs in code with spaces. (Jeff Ames via jghoman) + + HDFS-1185. Remove duplicate now() functions in DataNode, FSNamesysetm. + (Jeff Ames via jghoman) + + HDFS-1183. Remove some duplicate code in NamenodeJspHelper.java. + (Jeff Ames via jghoman) + + HDFS-1190. Remove unused getNamenode() method from DataNode. + (Jeff Ames via jghoman) + + HDFS-1110. Reuses objects for commonly used file names in namenode to + reduce the heap usage. (suresh) + + HDFS-752. Add interfaces classification to to HDFS source code. (suresh) + + HDFS-947. An Hftp read request is redirected to a datanode that has + the most replicas of the blocks in the file. (Dmytro Molkov via dhruba) + + HDFS-1272. Fixes to take care of the changes in HADOOP-6845. + (Jitendra Pandey via ddas) + + HDFS-1298 - Add support in HDFS for new statistics added in FileSystem + to track the file system operations. (suresh) + + HDFS-1201. The HDFS component for HADOOP-6632. + (Kan Zhang & Jitendra Pandey via ddas) + + HDFS-1307 Add start time, end time and total time taken for FSCK to + FSCK report (suresh) + + HDFS-1302. The HDFS side of the changes corresponding to HADOOP-6861. + (Jitendra Pandey & Owen O'Malley via ddas) + + HDFS-1315. Add fsck event to audit log and remove other audit log events + corresponding to FSCK listStatus and open calls. (suresh) + + HDFS-1178. The NameNode servlets should not use RPC to connect to the + NameNode. (Kan Zhang via jghoman) + + HDFS-1130. Adds dfs.cluster.administrator ACL configuration that can + be used to control who can view the default hdfs servlets. (ddas) + + HDFS-1297. Fix some comments. (Jeff Ames via jghoman) + + HDFS-330. Datanode Web UIs should provide robots.txt. + (Allen Wittenauer via jghoman) + + HDFS-881. Refactor DataNode Packet header into DataTransferProtocol. + (Todd Lipcon via jghoman) + + HDFS-1036. docs for fetchdt + + HDFS-1318. Add JMX interface for read access to namenode and datanode + web UI information. (Tanping Wang via suresh). + + HDFS-1356. Provide information as to whether or not security is + enabled on web interface for NameNode (boryas) + + HDFS-1205. FSDatasetAsyncDiskService should name its threads. + (Todd Lipcon via eli) + + HDFS-1111. Introduce getCorruptFileBlocks() for fsck. (Sriram Rao via shv) + + HDFS-1395. Add @Override to FSDataset methods that implement + FSDatasetInterface methods. (suresh) + + HDFS-1383. Improve the error messages when using hftp://. (szetszwo) + + HDFS-1093. Change the FSNamesystem lock to a read/write lock. (dhruba) + + HDFS-1407. Change DataTransferProtocol methods to use Block instead + of individual elements of Block. (suresh) + + HDFS-1417. Add @Override to SimulatedFSDataset methods that implement + FSDatasetInterface methods. (suresh) + + HDFS-1426. Remove unused method BlockInfo#listCount. (hairong) + + HDFS-1472. Allow programmatic access to fsck output. + (Ramkumar Vadali via dhruba) + + HADOOP-7007. Update the hudson-test-patch ant target to work with the + latest test-patch.sh script (gkesavan) + + HDFS-1462. Refactor edit log loading to a separate class from edit log writing. + (Todd Lipcon via eli) + + HDFS-1485. Fix typo in BlockPlacementPolicy. (Jingguo Yao via shv) + + HDFS-1035. Generate Eclipse's .classpath file from Ivy config. (nigel) + + HDFS-1408. Herriot NN and DN clients should vend statistics. (cos) + + HDFS-1491 Update Hdfs to match the change of methods from protected to public + in AbstractFileSystem (Hadoop-6903) (sanjay) + + HDFS-1160. Improve some FSDataset warnings and comments. (eli) + + HDFS-556. Provide info on failed volumes in the web ui. (eli) + + HDFS-697. Enable asserts for tests by default. (eli) + + HDFS-1187. Modify fetchdt to allow renewing and canceling token. + (Owen O'Malley and Kan Zhang via jghoman) + + HDFS-1387. Update HDFS permissions guide for security. (Todd Lipcon via eli) + + HDFS-455. Make NN and DN handle in a intuitive way comma-separated + configuration strings. (Michele Catasta via eli) + + HDFS-1071. savenamespace should write the fsimage to all configured + fs.name.dir in parallel (Dmytro Molkov via jghoman) + + HDFS-1055. Improve thread naming for DataXceivers. + (Todd Lipcon and Ramkumar Vadali via eli). + + HDFS-718. Configuration parameter to prevent accidental formatting of + HDFS filesystem. (Andrew Ryan via jghoman) + + HDFS-1500. TestOfflineImageViewer failing on trunk. (Todd Lipcon + via hairong) + + HDFS-1483. DFSClient.getBlockLocations should indicate if corresponding + blocks are corrupt. (Patrick Kling via hairong) + + HDFS-259. Remove intentionally corrupt 0.13 directory layout creation. + (Todd Lipcon via eli) + + HDFS-1513. Fix a number of warnings. (eli) + + HDFS-1473. Refactor storage management into separate classes than fsimage + file reading/writing. (Todd Lipcon via eli) + + HDFS-1582. Remove auto-generated native build files. (rvs via eli) + + HDFS-1456. Provide builder for constructing instances of MiniDFSCluster. + (jghoman) + + HDFS-1861. Rename dfs.datanode.max.xcievers and bump its default value. + (eli) + + HDFS-1052. HDFS Federation - Merge of umbrella jira changes from + HDFS-1052 branch into trunk. + + HDFS-1835. DataNode should not depend on SHA1PRNG secure random generator + to generate a storage ID. (John Carrino via todd) + + HDFS-1947. DFSClient should use mapreduce.task.attempt.id. (eli) + + HDFS-1957. Add documentation for HFTP. (Ari Rabkin via todd) + + HDFS-1454. Update the documentation to reflect that clients don't write + blocks to local disk before copying to HDFS. (Harsh J Chouraria via todd) + + HDFS-1980. Move build/webapps deeper in the build directory heirarchy + to aid eclipse users. (todd) + + HDFS-1619. Remove AC_TYPE* from the libhdfs. (Roman Shaposhnik via eli) + + HDFS-1948 Forward port 'hdfs-1520 lightweight namenode operation to + trigger lease recovery' (stack) + + HDFS-1954. Improved corrupt files warning on NameNode web UI. + (Patrick Hunt via shv) + + HDFS-1409. BackupNode registration throwing + UnsupportedActionException("register") instead of "journal". + (Ching-Shen Chen via shv) + + HDFS-2054 BlockSender.sendChunk() prints ERROR for connection closures + encountered during transferToFully() (Kihwal Lee via stack) + + OPTIMIZATIONS + + HDFS-1140. Speedup INode.getPathComponents. (Dmytro Molkov via shv) + + HDFS-1081. Performance regression in + DistributedFileSystem::getFileBlockLocations in secure systems (jghoman) + + HDFS-1114. Implement LightWeightGSet for BlocksMap in order to reduce + NameNode memory footprint. (szetszwo) + + HDFS-1320. Add LOG.isDebugEnabled() guard for each LOG.debug(..). + (Erik Steffl via szetszwo) + + HDFS-1368. Add a block counter to DatanodeDescriptor. (hairong) + + HDFS-1434. Refactor Datanode#startDataNode method into smaller methods. + (suresh) + + HDFS-941. The DFS client should cache and reuse open sockets to datanodes + while performing reads. (bc Wong and Todd Lipcon via todd) + + BUG FIXES + + HDFS-1039. Adding test for JspHelper.getUGI(jnp via boryas) + + HDFS-1019. Incorrect default values for delegation tokens in + hdfs-default.xml (jnp via boryas) + + HDFS-1039. Service should be set in the token in JspHelper.getUGI(jnp via boryas) + + HDFS-1038. FIX. A test missed in a previous commit for this JIRA. (boryas) + + HDFS-1038. In nn_browsedfscontent.jsp fetch delegation token only + if security is enabled. (jnp via boryas) + + HDFS-1044. Cannot submit mapreduce job from secure client to + unsecure sever (boryas) + + HDFS-1021. specify correct server principal for RefreshAuthorizationPolicyProtocol + and RefreshUserToGroupMappingsProtocol protocols in DFSAdmin (for HADOOP-6612) (boryas) + + HDFS-970. fsync fsimage to disk before closing fsimage file. + (Todd Lipcon via dhruba) + + HDFS-1027. Update copyright year to 2010. (Ravi Phulari via jghoman) + + HDFS-1080. SecondaryNameNode image transfer should use the defined http + address rather than local ip address. (jghoman) + + HDFS-1198. Resolving cross-realm principals. (Jitendra Pandey via jghoman) + + HDFS-1118. Fix socketleak on DFSClient. (Zheng Shao via dhruba) + + HDFS-1192. refreshSuperUserGroupsConfiguration should use server side + configuration for the refresh (for HADOOP-6815) (boryas) + + HDFS-1036. in DelegationTokenFetch dfs.getURI returns no port (boryas) + + HDFS-1017. browsedfs jsp should call JspHelper.getUGI rather + than using createRemoteUser() (jnp via boryas) + + HDFS-1250. Namenode should reject block reports and block received + requests from dead datanodes (suresh) + + HDFS-1145. When NameNode is shutdown it does not try to exit + safemode anymore. (dhruba) + + HDFS-1202. DataBlockScanner throws NPE when updated before + initialized. (Todd Lipcon via dhruba) + + HDFS-882. Datanode logs the hostname and port its listening on. + (Steve Loughran via dhruba) + + HDFS-1238. ant eclipse-files has drifted again, (jghoman) + + HDFS-1045. In secure clusters, re-login is necessary for https + clients before opening connections. (jghoman) + + HDFS-1289. Datanode secure mode is broken. (Kan Zhang via jghoman) + + HDFS-1007. HFTP needs to be updated to use delegation tokens (boryas) + + HDFS-1085. HFTP read may fail silently on the client side if there is an + exception on the server side. (szetszwo) + + HDFS-1308. job conf key for the services name of DelegationToken for HFTP + url is constructed incorrectly in HFTPFileSystem (boryas) + + HDFS-1319. Fix location of re-login for secondary namenode from HDFS-999. + (jghoman) + + HDFS-1317. Remove the FILEPATH_PATTERN from hdfsproxy.AuthorizationFilter. + (Rohini Palaniswamy via szetszwo) + + HDFS-912. sed in build.xml on Solaris fails. (Allen Wittenauer via jghoman) + + HDFS-1296. using delegation token over hftp for long running + clients (boryas) + + HDFS-1334. open in HftpFileSystem does not add delegation tokens to the url. + (Jitendra Pandey via jghoman) + + HDFS-1301. TestHDFSProxy need to use server side conf for ProxyUser + stuff. (boryas) + + HDFS-1340. When security is turned off, there is a potential XSS attack. + This patch fixes it by removing delegationtoken string from the URL, + before returning a response to the client. (Jitendra Pandey via ddas) + + HDFS-1347. TestDelegationToken uses mortbay.log for logging (boryas) + + HDFS-1157. Modifications introduced by HDFS-1150 are breaking aspect's + bindings (cos) + + HDFS-1349. Remove empty java files. (Eli Collins) + + HDFS-1340. A null delegation token is appended to the url if security + is disabled when browsing filesystem. (boryas) + + HDFS-1352. Fix jsvc.location. (Eli Collins via jghoman) + + HDFS-1284. TestBlockToken fails. (Kan Zhang via jghoman) + + HDFS-1355. ant veryclean (clean-cache) doesn't clean enough. + (Luke Lu via jghoman) + + HDFS-1353. Remove most of getBlockLocation optimization. (jghoman) + + HDFS-1369. Invalid javadoc reference in FSDatasetMBean.java (Eli Collins) + + HDFS-829. hdfsJniHelper.c: #include is not portable. + (Allen Wittenauer via jghoman) + + HDFS-1310. The ClientDatanodeProtocol proxy should be stopped in + DFSInputStream.readBlockLength(..). (sam rash via szetszwo) + + HDFS-1357. HFTP traffic served by DataNode shouldn't use service port + on NameNode. (Kan Zhang via jghoman) + + HDFS-1419. HDFS Federation: Three test cases need minor modification after + the new block id change (Tanping Wang via suresh) + + HDFS-96. HDFS supports blocks larger than 2 GB. + (Patrick Kling via dhruba) + + HDFS-1433. Fix test failures - TestPread and TestFileLimit. (suresh) + + HDFS-1364. Makes long running HFTP-based applications do relogins + if necessary. (Jitendra Pandey via ddas) + + HDFS-1399. Distinct minicluster services (e.g. NN and JT) overwrite each + other's service policies. (Aaron T. Myers via tomwhite) + + HDFS-1440. Fix TestComputeInvalidateWork failure. (suresh) + + HDFS-1498. FSDirectory#unprotectedConcat calls setModificationTime + on a file. (eli) + + HDFS-1625. Ignore disk space values in TestDataNodeMXBean. (szetszwo) + + HDFS-1850. DN should transmit absolute failed volume count rather than + increments to the NN. (eli) + + HDFS-671. Documentation change for updated configuration keys. + (tomwhite via eli) + + HDFS-1544. Ivy resolve force mode should be turned off by default. + (Luke Lu via tomwhite) + + HDFS-1615. seek() on closed DFS input stream throws NullPointerException + (Scott Carey via todd) + + HDFS-1897. Documentation refers to removed option dfs.network.script + (Andrew Whang via todd) + + HDFS-1621. Fix references to hadoop-common-${version} in build.xml + (Jolly Chen via todd) + + HDFS-1505. saveNamespace appears to succeed even if all directories fail + to save. (Aaron T. Myers via todd) + + HDFS-1921. saveNamespace can cause NN to be unable to come up on restart + (Matt Foley via todd) + + HDFS-1925. SafeModeInfo should use the correct constant instead of a + hard-coded value for its default. (Joey Echeverria via todd) + + HDFS-1575. Viewing block from web UI is broken. (Aaron T. Myers via todd) + + HDFS-1932. Ensure that HDFS configuration deprecations are set up in every + spot that HDFS configurations are loaded. (Jolly Chen via todd) + + HDFS-1952. FSEditLog.open() appears to succeed even if all EDITS + directories fail. (Andrew Wang via todd) + + HDFS-1965. IPCs done using block token-based tickets can't reuse + connections (todd) + + HDFS-1978. All but first option in LIBHDFS_OPTS is ignored. (eli) + + HDFS-1964. Fix incorrect HTML unescaping in DatanodeJspHelper + (Aaron T. Myers via todd) + + HDFS-1997. Image transfer process misreports client side exceptions. + (todd via eli) + + HDFS-2000. Missing deprecation for io.bytes.per.checksum. + (Aaron T. Myers vie eli) + + HDFS-977. DataNode.createInterDataNodeProtocolProxy() guards a log + at the wrong level. (Harsh J Chouraria via todd) + + HDFS-1969. Running rollback on new-version namenode destroys the + namespace. (todd) + + HDFS-2039. TestNameNodeMetrics uses a bad test root path, preventing it + from running inside Eclipse. (todd) + + HDFS-988. saveNamespace race can corrupt the edits log. (eli) + + HDFS-2071. Use of isConnected() in DataXceiver is invalid. (Kihwal Lee + via todd) + + HDFS-1981. NameNode does not saveNamespace() when editsNew is empty. + (Uma Maheswara Rao G via shv) + + HDFS-2232. Generalize regular expressions in TestHDFSCLI. + (Plamen Jeliazkov via shv) + + HDFS-2290. Block with corrupt replica is not getting replicated. + (Benoy Antony via shv) + + HDFS-2452. OutOfMemoryError in DataXceiverServer takes down the DataNode + (Uma Maheswara Rao via cos) + + HDFS-2002. Incorrect computation of needed blocks in getTurnOffTip(). + (Plamen Jeliazkov via shv) + + HDFS-2514. Link resolution bug for intermediate symlinks with + relative targets. (eli) + +Release 0.21.1 - Unreleased + + HDFS-1466. TestFcHdfsSymlink relies on /tmp/test not existing. (eli) + + HDFS-874. TestHDFSFileContextMainOperations fails on weirdly + configured DNS hosts. (Todd Lipcon via eli) + + HDFS-1507. TestAbandonBlock should abandon a block. (eli) + + HDFS-1487. FSDirectory.removeBlock() should update diskspace count + of the block owner node (Zhong Wang via eli). + + HDFS-1467. Append pipeline never succeeds with more than one replica. + (Todd Lipcon via eli) + + HDFS-1167. New property for local conf directory in system-test-hdfs.xml + file. (Vinay Thota via cos) + + HDFS-1503. TestSaveNamespace fails. (Todd Lipcon via cos) + + HDFS-1524. Image loader should make sure to read every byte in image file. + (hairong) + + HDFS-1523. TestLargeBlock is failing on trunk. (cos) + + HDFS-1502. TestBlockRecovery triggers NPE in assert. (hairong via cos) + + HDFS-1532. Exclude Findbugs warning in FSImageFormat$Saver. (Todd Lipcon + via cos) + + HDFS-1527. SocketOutputStream.transferToFully fails for blocks >= 2GB on + 32 bit JVM. (Patrick Kling via cos) + + HDFS-1531. Clean up stack traces due to duplicate MXBean registration. + (Todd Lipcon via cos) + + HDFS-613. TestBalancer and TestBlockTokenWithDFS fail Balancer assert. + (Todd Lipcon via cos) + + HDFS-1511. 98 Release Audit warnings on trunk and branch-0.22. + (jghoman) + + HDFS-1560. dfs.data.dir permissions should default to 700. + (Todd Lipcon via eli) + + HDFS-1550. NPE when listing a file with no location. (hairong) + + HDFS-1542. Add test for HADOOP-7082, a deadlock writing Configuration to + HDFS. (todd) + + HDFS-1504. FSImageSaver should catch all exceptions, not just IOE. (todd) + + HDFS-884. DataNode throws IOException if all data directories are + unavailable. (Steve Loughran and shv) + + HDFS-1591. HDFS part of HADOOP-6642. (Chris Douglas, Po Cheung via shv) + + HDFS-900. Corrupt replicas are not processed correctly in block report (shv) + + HDFS-1529. Incorrect handling of interrupts in waitForAckedSeqno can cause + deadlock (todd) + + HDFS-1597. Batched edit log syncs can reset synctxid and throw assertions + (todd) + + HDFS-1602. Fix HADOOP-4885 for it is doesn't work as expected. (boryas) + + HDFS-1618. configure files that are generated as part of the released + tarball need to have executable bit set (Roman Shaposhnik via cos) + + HDFS-981. test-contrib fails due to test-cactus failure (cos) + + HDFS-1001. DataXceiver and BlockReader disagree on when to send/recv + CHECKSUM_OK. (bc Wong via eli) + + HDFS-1781. Fix the path for jsvc in bin/hdfs. (John George via szetszwo) + + HDFS-1782. Fix an NPE in FSNamesystem.startFileInternal(..). + (John George via szetszwo) + + HDFS-1821. Fix username resolution in NameNode.createSymlink(..) and + FSDirectory.addSymlink(..). (John George via szetszwo) + + HDFS-1806. TestBlockReport.blockReport_08() and _09() are timing-dependent + and likely to fail on fast servers. (Matt Foley via eli) + + HDFS-1845. Symlink comes up as directory after namenode restart. + (John George via eli) + + HDFS-1666. Disable failing hdfsproxy test TestAuthorizationFilter (todd) + + HDFS-1823. start-dfs.sh script fails if HADOOP_HOME is not set. + (tomwhite via eli) + +Release 0.21.1 - Unreleased + + HDFS-1411. Correct backup node startup command in hdfs user guide. + (Ching-Shen Chen via shv) + + BUG FIXES + + HDFS-1363. Eliminate second synchronized sections in appendFile(). (shv) + + HDFS-1413. Fix broken links to HDFS Wiki. (shv) + + HDFS-1420. Clover build doesn't generate per-test coverage (cos) + + HDFS-1444. Test related code of build.xml is error-prone and needs to be + re-aligned. (cos) + + HDFS-1343. Instrumented build should be concentrated in one build area (cos) + + HDFS-1452. ant compile-contrib is broken (cos) + + HDFS-1474. ant binary-system is broken (cos) + + HDFS-1292. Allow artifacts to be published to the staging Apache Nexus + Maven Repository. (Giridharan Kesavan via tomwhite) + + HDFS-1552. Remove java5 dependencies from build. (cos) + + HDFS-1189. Quota counts missed between clear quota and set quota. + (John George via szetszwo) + + HDFS-1665. Balancer misuses dfs.heartbeat.interval as milliseconds. + (szetszwo) + + HDFS-1728. SecondaryNameNode.checkpointSize is in bytes but not in MB. + (szetszwo) + + HDFS-1206. TestFiHFlush fails intermittently. (cos) + + HDFS-1548. Fault-injection tests are executed multiple times if invoked + with run-test-hdfs-fault-inject target (cos) + + HDFS-1552. Remove java5 dependencies from build. (cos) + + HDFS-996. JUnit tests should never depend on anything in conf (cos) + + HDFS-1612. Update HDFS design documentation for append, quota, symlink, + block placement and checkpoint/backup node features. (Joe Crobak + via szetszwo) + + + HDFS-1596. Replace fs.checkpoint.* with dfs.namenode.checkpoint.* + in documentations. (Harsh J Chouraria via szetszwo) + + HDFS-1786. Some cli test cases expect a "null" message + (Uma Maheswara Rao G via todd) + + HDFS-1855. TestDatanodeBlockScanner.testBlockCorruptionRecoveryPolicy() + part 2 fails in two different ways. (Matt Foley via eli) + +Release 0.21.0 - 2010-08-13 + + INCOMPATIBLE CHANGES + + HDFS-538. Per the contract elucidated in HADOOP-6201, throw + FileNotFoundException from FileSystem::listStatus rather than returning + null. (Jakob Homan via cdouglas) + + HDFS-602. DistributedFileSystem mkdirs throws FileAlreadyExistsException + instead of FileNotFoundException. (Boris Shkolnik via suresh) + + HDFS-544. Add a "rbw" subdir to DataNode data directory. (hairong) + + HDFS-576. Block report includes under-construction replicas. (shv) + + HDFS-636. SafeMode counts complete blocks only. (shv) + + HDFS-644. Lease recovery, concurrency support. (shv) + + HDFS-570. Get last block length from a data-node when opening a file + being written to. (Tsz Wo (Nicholas), SZE via shv) + + HDFS-657. Remove unused legacy data-node protocol methods. (shv) + + HDFS-658. Block recovery for primary data-node. (shv) + + HDFS-660. Remove deprecated methods from InterDatanodeProtocol. (shv) + + HDFS-512. Block.equals() and compareTo() compare blocks based + only on block Ids, ignoring generation stamps. (shv) + + HDFS-873. Configuration specifies data-node storage directories as URIs. + (shv) + + HDFS-905. Use the new UserGroupInformation from HDFS-6299. + (jghoman via omalley) + + HDFS-984. Persistent delegation tokens. (Jitendra Pandey via shv) + + HDFS-1016. HDFS side change for HADOOP-6569. This jira changes the + error message on the screen when cat a directory or a + non-existent file. (hairong) + + NEW FEATURES + + HDFS-1134. Large-scale Automated Framework. (cos) + + HDFS-436. Introduce AspectJ framework for HDFS code and tests. + (Konstantin Boudnik via szetszwo) + + HDFS-447. Add LDAP lookup to hdfsproxy. (Zhiyong Zhang via cdouglas) + + HDFS-459. Introduce Job History Log Analyzer. (shv) + + HDFS-461. Tool to analyze file size distribution in HDFS. (shv) + + HDFS-492. Add two JSON JSP pages to the Namenode for providing corrupt + blocks/replicas information. (Bill Zeller via szetszwo) + + HDFS-578. Add support for new FileSystem method for clients to get server + defaults. (Kan Zhang via suresh) + + HDFS-595. umask settings in configuration may now use octal or symbolic + instead of decimal. (Jakob Homan via suresh) + + HADOOP-6234. Updated hadoop-core and test jars to propagate new option + dfs.umaskmode in configuration. (Jakob Homan via suresh) + + HDFS-235. Add support for byte ranges in HftpFileSystem to serve + range of bytes from a file. (Bill Zeller via suresh) + + HDFS-385. Add support for an experimental API that allows a module external + to HDFS to specify how HDFS blocks should be placed. (dhruba) + + HADOOP-4952. Update hadoop-core and test jars to propagate new FileContext + file system application interface. (Sanjay Radia via suresh). + + HDFS-567. Add block forensics contrib tool to print history of corrupt and + missing blocks from the HDFS logs. + (Bill Zeller, Jitendra Nath Pandey via suresh). + + HDFS-610. Support o.a.h.fs.FileContext. (Sanjay Radia via szetszwo) + + HDFS-536. Support hflush at DFSClient. (hairong) + + HDFS-517. Introduce BlockInfoUnderConstruction to reflect block replica + states while writing. (shv) + + HDFS-565. Introduce block committing logic during new block allocation + and file close. (shv) + + HDFS-537. DataNode exposes a replica's meta info to BlockReceiver for the + support of dfs writes/hflush. It also updates a replica's bytes received, + bytes on disk, and bytes acked after receiving a packet. (hairong) + + HDFS-585. Datanode should serve up to visible length of a replica for read + requests. (szetszwo) + + HDFS-604. Block report processing for append. (shv) + + HDFS-619. Support replica recovery initialization in datanode for the new + append design. (szetszwo) + + HDFS-592. Allow clients to fetch a new generation stamp from NameNode for + pipeline recovery. (hairong) + + HDFS-624. Support a new algorithm for pipeline recovery and pipeline setup + for append. (hairong) + + HDFS-627. Support replica update in data-node. + (Tsz Wo (Nicholas), SZE and Hairong Kuang via shv) + + HDFS-642. Support pipeline close and close error recovery. (hairong) + + HDFS-631. Rename configuration keys towards API standardization and + backward compatibility. (Jitendra Nath Pandey via suresh) + + HDFS-669. Add unit tests framework (Mockito) (cos, Eli Collins) + + HDFS-731. Support new Syncable interface in HDFS. (hairong) + + HDFS-702. Add HDFS implementation of AbstractFileSystem. + (Sanjay Radio via suresh) + + HDFS-758. Add decommissioning status page to Namenode Web UI. + (Jitendra Nath Pandey via suresh) + + HDFS-814. Add an api to get the visible length of a DFSDataInputStream. + (szetszwo) + + HDFS-654. Add support new atomic rename functionality in HDFS for + supporting rename in FileContext. (suresh) + + HDFS-222. Support for concatenating of files into a single file + without copying. (Boris Shkolnik via hairong) + + HDFS-933. Adds Delegation token based authentication in the NameNode. + (Kan Zhang via ddas) + + HDFS-935. Adds a real user component in Delegation token. + (Jitendra Nath Pandey via ddas) + + HDFS-245. Adds a symlink implementation to HDFS. This complements the new + symlink feature added in HADOOP-6421 (Eli Collins via Sanjay Radia) + + HDFS-1009. Support Kerberos authorization in HDFSProxy. (Srikanth + Sundarrajan via szetszwo) + + HDFS-1091. Implement listStatus that returns an iterator of FileStatus. + (hairong) + + IMPROVEMENTS + + HDFS-381. Remove blocks from DataNode maps when corresponding file + is deleted. (Suresh Srinivas via rangadi) + + HDFS-377. Separate codes which implement DataTransferProtocol. + (szetszwo) + + HDFS-396. NameNode image and edits directories are specified as URIs. + (Luca Telloli via rangadi) + + HDFS-444. Allow to change probability levels dynamically in the fault + injection framework. (Konstantin Boudnik via szetszwo) + + HDFS-352. Documentation for saveNamespace command. (Ravi Phulari via shv) + + HADOOP-6106. Updated hadoop-core and test jars from hudson trunk + build #12. (Giridharan Kesavan) + + HDFS-204. Add a new metrics FilesInGetListingOps to the Namenode. + (Jitendra Nath Pandey via szetszwo) + + HDFS-278. HDFS Outputstream close does not hang forever. (dhruba) + + HDFS-443. Add a new metrics numExpiredHeartbeats to the Namenode. + (Jitendra Nath Pandey via szetszwo) + + HDFS-475. Add new ant targets for fault injection jars and tests. + (Konstantin Boudnik via szetszwo) + + HDFS-458. Create a new ant target, run-commit-test. (Jakob Homan + via szetszwo) + + HDFS-493. Change build.xml so that the fault-injected tests are executed + only by the run-test-*-fault-inject targets. (Konstantin Boudnik via + szetszwo) + + HDFS-446. Improvements to Offline Image Viewer. (Jakob Homan via shv) + + HADOOP-6160. Fix releaseaudit target to run on specific directories. + (gkesavan) + + HDFS-501. Use enum to define the constants in DataTransferProtocol. + (szetszwo) + + HDFS-508. Factor out BlockInfo from BlocksMap. (shv) + + HDFS-510. Rename DatanodeBlockInfo to be ReplicaInfo. + (Jakob Homan & Hairong Kuang via shv) + + HDFS-500. Deprecate NameNode methods deprecated in NameNodeProtocol. + (Jakob Homan via shv) + + HDFS-514. Change DFSClient.namenode from public to private. (Bill Zeller + via szetszwo) + + HDFS-496. Use PureJavaCrc32 in HDFS. (Todd Lipcon via szetszwo) + + HDFS-511. Remove redundant block searches in BlockManager. (shv) + + HDFS-504. Update the modification time of a file when the file + is closed. (Chun Zhang via dhruba) + + HDFS-498. Add development guide and documentation for the fault injection + framework. (Konstantin Boudnik via szetszwo) + + HDFS-524. Further DataTransferProtocol code refactoring. (szetszwo) + + HDFS-529. Use BlockInfo instead of Block to avoid redundant block searches + in BlockManager. (shv) + + HDFS-530. Refactor TestFileAppend* to remove code duplication. + (Konstantin Boudnik via szetszwo) + + HDFS-451. Add fault injection tests for DataTransferProtocol. (szetszwo) + + HDFS-409. Add more access token tests. (Kan Zhang via szetszwo) + + HDFS-546. DatanodeDescriptor iterates blocks as BlockInfo. (shv) + + HDFS-457. Do not shutdown datanode if some, but not all, volumes fail. + (Boris Shkolnik via szetszwo) + + HDFS-548. TestFsck takes nearly 10 minutes to run. (hairong) + + HDFS-539. Refactor fault injeciton pipeline test util for future reuse. + (Konstantin Boudnik via szetszwo) + + HDFS-552. Change TestFiDataTransferProtocol to junit 4 and add a few new + tests. (szetszwo) + + HDFS-563. Simplify the codes in FSNamesystem.getBlockLocations(..). + (szetszwo) + + HDFS-581. Introduce an iterator over blocks in the block report array.(shv) + + HDFS-549. Add a new target, run-with-fault-inject-testcaseonly, which + allows an execution of non-FI tests in FI-enable environment. (Konstantin + Boudnik via szetszwo) + + HDFS-173. Namenode will not block until a large directory deletion + completes. It allows other operations when the deletion is in progress. + (suresh) + + HDFS-551. Create new functional test for a block report. (Konstantin + Boudnik via hairong) + + HDFS-288. Redundant computation in hashCode() implementation. + (szetszwo via tomwhite) + + HDFS-412. Hadoop JMX usage makes Nagios monitoring impossible. + (Brian Bockelman via tomwhite) + + HDFS-472. Update hdfsproxy documentation. Adds a setup guide and design + document. (Zhiyong Zhang via cdouglas) + + HDFS-617. Support non-recursive create(). (Kan Zhang via szetszwo) + + HDFS-618. Support non-recursive mkdir(). (Kan Zhang via szetszwo) + + HDFS-574. Split the documentation between the subprojects. + (Corinne Chandel via omalley) + + HDFS-598. Eclipse launch task for HDFS. (Eli Collins via tomwhite) + + HDFS-641. Move all of the components that depend on map/reduce to + map/reduce. (omalley) + + HDFS-509. Redesign DataNode volumeMap to include all types of Replicas. + (hairong) + + HDFS-562. Add a test for NameNode.getBlockLocations(..) to check read from + un-closed file. (szetszwo) + + HDFS-543. Break FSDatasetInterface#writToBlock() into writeToRemporary, + writeToRBW, ad append. (hairong) + + HDFS-603. Add a new interface, Replica, which is going to replace the use + of Block in datanode. (szetszwo) + + HDFS-589. Change block write protocol to support pipeline recovery. + (hairong) + + HDFS-652. Replace BlockInfo.isUnderConstruction() with isComplete() (shv) + + HDFS-648. Change some methods in AppendTestUtil to public. (Konstantin + Boudnik via szetszwo) + + HDFS-662. Unnecessary info message from DFSClient. (hairong) + + HDFS-518. Create new tests for Append's hflush. (Konstantin Boudnik + via szetszwo) + + HDFS-688. Add configuration resources to DFSAdmin. (shv) + + HDFS-29. Validate the consistency of the lengths of replica and its file + in replica recovery. (szetszwo) + + HDFS-680. Add new access method to a copy of a block's replica. (shv) + + HDFS-704. Unify build property names to facilitate cross-projects + modifications (cos) + + HDFS-705. Create an adapter to access some of package-private methods of + DataNode from tests (cos) + + HDFS-710. Add actions with constraints to the pipeline fault injection + tests and change SleepAction to support uniform random sleeping over an + interval. (szetszwo) + + HDFS-713. Need to properly check the type of the test class from an aspect + (cos) + + HDFS-716. Define a pointcut for pipeline close and add a few fault + injection tests to simulate out of memory problem. (szetszwo) + + HDFS-719. Add 6 fault injection tests for pipeline close to simulate slow + datanodes and disk errors. (szetszwo) + + HDFS-616. Create functional tests for new design of the block report. (cos) + + HDFS-584. Fail the fault-inject build if any advices are mis-bound. (cos) + + HDFS-730. Add 4 fault injection tests to simulate non-responsive datanode + and out-of-memory problem for pipeline close ack. (szetszwo) + + HDFS-728. Create a comprehensive functional test for append. (hairong) + + HDFS-736. commitBlockSynchronization() updates block GS and length + in-place. (shv) + + HADOOP-5107. Use Maven ant tasks to publish the subproject jars. + (Giridharan Kesavan via omalley) + + HDFS-521. Create new tests for pipeline (cos) + + HDFS-764. Places the Block Access token implementation in hdfs project. + (Kan Zhang via ddas) + + HDFS-787. Upgrade some libraries to be consistent with common and + mapreduce. (omalley) + + HDFS-519. Create new tests for lease recovery (cos) + + HDFS-804. New unit tests for concurrent lease recovery (cos) + + HDFS-813. Enable the append test in TestReadWhileWriting. (szetszwo) + + HDFS-145. Cleanup inconsistent block length handling code in + FSNameSystem#addStoredBlock. (hairong) + + HDFS-127. Reset failure count in DFSClient for each block acquiring + operation. (Igor Bolotin via szetszwo) + + HDFS-520. Create new tests for block recovery. (hairong) + + HDFS-1067. Create block recovery tests that handle errors. (hairong) + + HDFS-1107. Turn on append by default. (shv) + + HDFS-968. Use StringBuilder instead of StringBuffer for better + performance. (Kay Kay via suresh) + + HDFS-703. Replace current fault injection implementation with one + from (cos) + + HDFS-754. Reduce ivy console output to observable level (cos) + + HDFS-832. HDFS side of HADOOP-6222. (cos) + + HDFS-840. Change tests to use FileContext test helper introduced in + HADOOP-6394. (Jitendra Nath Pandey via suresh) + + HDFS-685. Use the user-to-groups mapping service in the NameNode. + (boryas, acmurthy) + + HDFS-755. Read multiple checksum chunks at once in DFSInputStream. + (Todd Lipcon via tomwhite) + + HDFS-786. Implement getContentSummary in HftpFileSystem. + (Tsz Wo (Nicholas), SZE via cdouglas) + + HDFS-587. Add support for specifying queue name in mapreduce tests. + (Erik Steffl via suresh) + + HDFS-902 Move contrib/raid to MapReduce. (Eli Collins via omalley) + + HDFS-800. The last block of a file under construction may change to the + COMPLETE state in response to getAdditionalBlock or completeFileInternal. + (hairong) + + HDFS-899. Delegation Token Implementation + and corresponding changes in Namenode and DFS Api to issue, + renew and cancel delegation tokens. (jnp via boryas) + + HDFS-844. Log the filename when file locking fails. (tomwhite) + + HDFS-914. Refactor DFSOutputStream and DFSInputStream out of DFSClient. + (Todd Lipcon via tomwhite) + + HDFS-949. Move DelegationToken into Common so that it can be used by + MapReduce. (omalley) + + HDFS-930. Better error message for DATA_TRANSFER_VERSION mismatched. + (Kay Kay via szetszwo) + + HDFS-986. Delegation token renewing and cancelling should provide + meaningful exceptions when there are failures instead of returning + false. (omalley) + + HADOOP-6579. Upgrade the commons-codec library to 1.4. (omalley) + + HDFS-991. Allow authentication to the web ui via a delegation token. + (omalley) + + HDFS-994. Allow fetching of delegation token from NameNode for hftp. + (Jakob Homan via acmurthy) + + HDFS-998. Quote blocks streamed through jsps. (cdouglas) + + HDFS-729. NameNode API to list files that have missing blocks. + (Rodrigo Schmidt via dhruba) + + HDFS-850. The WebUI display more details about namenode memory usage. + (Dmytro Molkov via dhruba) + + HDFS-826. The DFSOutputStream has a API that returns the number of + active datanode(s) in the current pipeline. (dhruba) + + HDFS-985. HDFS should issue multiple RPCs for listing a large + directory. (hairong) + + HDFS-1043. NNThroughputBenchmark modifications to support benchmarking of + server-side user group resolution. (shv) + + HDFS-892. Optionally use Avro reflection for Namenode RPC. This + is not a complete implementation yet, but rather a starting point. + (cutting) + + HDFS-854. Datanode should scan devices in parallel to generate + block report. (Dmytro Molkov via jhoman) + + HDFS-1032. fsck has an option to list corrupt files. + (Andre Oriai via dhruba) + + HDFS-1024. SecondaryNameNode verifies size of fsimage and edits file. + (Dmytro Molkov via dhruba) + + HDFS-1011. hdfsproxy: Improve log messages by restoring the previous + thread name. (Srikanth Sundarrajan via szetszwo) + + HDFS-997. Allow datanode storage directory permissions to be configurable. + (Luke Lu via cdouglas) + + HDFS-1012. hdfsproxy: Support for fully qualified HDFS path in addition to + simple unqualified path. (Srikanth Sundarrajan via szetszwo) + + HDFS-933. Namenode should issue a delegation token only for kerberos + authenticated clients.(jnp via boryas) + + HDFS-1087. Modify audit log to use a StringBuilder rather than a Formatter. + (cdouglas) + + HDFS-1083. Update TestHDFSCLI not to expect exception class name + in error messages. (suresh) + + HDFS-1099. Add test for umask backward compatibility. (suresh) + + HDFS-1092. Use logging rather than System.err in MiniDFSCluster. + (Kay Kay via jghoman) + + HDFS-1047. Install/deploy source jars to Maven repo. + (Patrick Angeles via jghoman) + + HDFS-666. Unit test for FsShell -text. (cdouglas via jghoman) + + HDFS-1054. Remove unnecessary sleep after failure in nextBlockOutputStream. + (Todd Lipcon via jghoman) + + HDFS-921. Convert TestDFSClientRetries::testNotYetReplicatedErrors + to Mockito. (jghoman) + + HDFS-1100. Override unwrapException in TestFcHdfsSymlink to test + symlink API conformance. (Eli Collins via suresh). + + HDFS-1089. Remove uses of FileContext#isFile, isDirectory, and exists. + (Eli Collins via hairong) + + HDFS-1028. Efficient splitting of path components reduces the time + to load in fsimage by 20%. (Dmytro Molkov via dhruba) + + HDFS-1109. HFTP supports filenames that contains the character "+". + (Dmytro Molkov via dhruba) + + HDFS-853. The HDFS webUI displays the balanced-ness of the cluster. + (Dmytro Molkov via dhruba) + + HDFS-1126. Change HDFS to depend on Hadoop 'common' artifacts instead + of 'core'. (tomwhite) + + HDFS-995. Replace usage of FileStatus#isDir(). (Eli Collins via + tomwhite) + + HDFS-1161. Make DN minimum valid volumes configurable. + (Eli Collins via tomwhite) + + HDFS-1181. Move configuration and script files post split. (tomwhite) + + HDFS-1170. Add more assertions to TestLargeDirectoryDelete. + (Steve Loughran via tomwhite) + + HDFS-1199. Extract a subset of tests for smoke (DOA) validation. (cos) + + HDFS-1174. New properties for suspend and resume process. (Vinay Thota via + cos) + + HDFS-1277. [Herriot] New property for multi user list. (Vinay Thota via + cos) + + HDFS-806. Add new unit tests to the 10-mins 'run-commit-test' target (cos) + + OPTIMIZATIONS + + HDFS-946. NameNode should not return full path name when lisitng a + diretory or getting the status of a file. (hairong) + + BUG FIXES + + HDFS-76. Better error message to users when commands fail because of + lack of quota. Allow quota to be set even if the limit is lower than + current consumption. (Boris Shkolnik via rangadi) + + HADOOP-4687. HDFS is split from Hadoop Core. It is a subproject under + Hadoop (Owen O'Malley) + + HADOOP-6096. Fix Eclipse project and classpath files following project + split. (tomwhite) + + HDFS-195. Handle expired tokens when write pipeline is reestablished. + (Kan Zhang via rangadi) + + HDFS-181. Validate src path in FSNamesystem.getFileInfo(..). (Todd + Lipcon via szetszwo) + + HDFS-441. Remove TestFTPFileSystem. (szetszwo) + + HDFS-440. Fix javadoc broken links in DFSClient. (szetszwo) + + HDFS-480. Fix a typo in the jar name in build.xml. + (Konstantin Shvachko via gkesavan) + + HDFS-438. Check for NULL before invoking GenericArgumentParser in + DataNode. (Raghu Angadi) + + HDFS-415. BlockReceiver hangs in case of certain runtime exceptions. + (Konstantin Boudnik via rangadi) + + HDFS-462. loadFSImage should close edits file. (Jakob Homan via shv) + + HDFS-489. Update TestHDFSCLI for the -skipTrash option in rm. (Jakob Homan + via szetszwo) + + HDFS-445. pread() does not pick up changes to block locations. + (Kan Zhang via rangadi) + + HDFS-463. CreateEditLog utility broken after HDFS-396 (URI for + FSImage). (Suresh Srinivas via rangadi) + + HDFS-484. Fix bin-package and package target to package jar files. + (gkesavan) + + HDFS-490. Eliminate the deprecated warnings introduced by H-5438. + (He Yongqiang via szetszwo) + + HDFS-119. Fix a bug in logSync(), which causes NameNode block forever. + (Suresh Srinivas via shv) + + HDFS-534. Include avro in ivy. (szetszwo) + + HDFS-532. Allow applications to know that a read request failed + because block is missing. (dhruba) + + HDFS-561. Fix write pipeline READ_TIMEOUT in DataTransferProtocol. + (Kan Zhang via szetszwo) + + HDFS-553. BlockSender reports wrong failed position in ChecksumException. + (hairong) + + HDFS-568. Set mapred.job.tracker.retire.jobs to false in + src/test/mapred-site.xml for mapreduce tests to run. (Amareshwari + Sriramadasu via szetszwo) + + HDFS-15. All replicas end up on 1 rack. (Jitendra Nath Pandey via hairong) + + HDFS-586. TestBlocksWithNotEnoughRacks sometimes fails. + (Jitendra Nath Pandey via hairong) + + HADOOP-6243. Fixed a NullPointerException in handling deprecated keys. + (Sreekanth Ramakrishnan via yhemanth) + + HDFS-605. Do not run fault injection tests in the run-test-hdfs-with-mr + target. (Konstantin Boudnik via szetszwo) + + HDFS-606. Fix ConcurrentModificationException in invalidateCorruptReplicas() + (shv) + + HDFS-601. TestBlockReport obtains data directories directly from + MiniHDFSCluster. (Konstantin Boudnik via shv) + + HDFS-614. TestDatanodeBlockScanner obtains data directories directly from + MiniHDFSCluster. (shv) + + HDFS-612. Remove the use of org.mortbay.log.Log in FSDataset. (szetszwo) + + HDFS-622. checkMinReplication should count live nodes only. (shv) + + HDFS-629. Remove ReplicationTargetChooser.java along with fixing + import warnings generated by Eclipse. (dhruba) + + HDFS-637. DataNode sends a Success ack when block write fails. (hairong) + + HDFS-640. Fixed TestHDFSFileContextMainOperations.java build failure. (suresh) + + HDFS-547. TestHDFSFileSystemContract#testOutputStreamClosedTwice + sometimes fails with CloseByInterruptException. (hairong) + + HDFS-588. Fix TestFiDataTransferProtocol and TestAppend2 failures. (shv) + + HDFS-550. DataNode restarts may introduce corrupt/duplicated/lost replicas + when handling detached replicas. (hairong) + + HDFS-659. If the the last block is not complete, update its length with + one of its replica's length stored in datanode. (szetszwo) + + HDFS-649. Check null pointers for DataTransferTest. (Konstantin Boudnik + via szetszwo) + + HDFS-661. DataNode upgrade fails on non-existant current directory. + (hairong) + + HDFS-597. Mofication introduced by HDFS-537 breakes an advice binding in + FSDatasetAspects. (Konstantin Boudnik via szetszwo) + + HDFS-665. TestFileAppend2 sometimes hangs. (hairong) + + HDFS-676. Fix NPE in FSDataset.updateReplicaUnderRecovery() (shv) + + HDFS-673. BlockReceiver#PacketResponder should not remove a packet from + the ack queue before its ack is sent. (hairong) + + HDFS-682. Fix bugs in TestBlockUnderConstruction. (szetszwo) + + HDFS-668. TestFileAppend3#TC7 sometimes hangs. (hairong) + + HDFS-679. Appending to a partial chunk incorrectly assumes the + first packet fills up the partial chunk. (hairong) + + HDFS-722. Fix callCreateBlockWriteStream pointcut in FSDatasetAspects. + (szetszwo) + + HDFS-690. TestAppend2#testComplexAppend failed on "Too many open files". + (hairong) + + HDFS-725. Support the build error fix for HADOOP-6327. (Sanjay Radia via + szetszwo) + + HDFS-625. Fix NullPointerException thrown from ListPathServlet. (suresh) + + HDFS-735. TestReadWhileWriting has wrong line termination symbols (cos) + + HDFS-691. Fix an overflow error in DFSClient.DFSInputStream.available(). + (szetszwo) + + HDFS-733. TestBlockReport fails intermittently. (cos) + + HDFS-774. Intermittent race condition in TestFiPipelines (cos) + + HDFS-741. TestHFlush test doesn't seek() past previously written part of + the file (cos, szetszwo) + + HDFS-706. Intermittent failures in TestFiHFlush (cos) + + HDFS-646. Fix test-patch failure by adding test-contrib ant target. + (gkesavan) + + HDFS-791. Build is broken after HDFS-787 patch has been applied (cos) + + HDFS-792. TestHDFSCLI is failing. (Todd Lipcon via cos) + + HDFS-781. Namenode metrics PendingDeletionBlocks is not decremented. + (Suresh) + + HDFS-192. Fix TestBackupNode failures. (shv) + + HDFS-797. TestHDFSCLI much slower after HDFS-265 merge. (Todd Lipcon via cos) + + HDFS-824. Stop lease checker in TestReadWhileWriting. (szetszwo) + + HDFS-823. CheckPointer should use addInternalServlet for image-fetching + servlet (jghoman) + + HDFS-456. Fix URI generation for windows file paths. (shv) + + HDFS-812. FSNamesystem#internalReleaseLease throws NullPointerException on + a single-block file's lease recovery. (cos) + + HDFS-724. Pipeline hangs if one of the block receiver is not responsive. + (hairong) + + HDFS-564. Adding pipeline tests 17-35. (hairong) + + HDFS-849. TestFiDataTransferProtocol2#pipeline_Fi_18 sometimes fails. + (hairong) + + HDFS-762. Balancer causes Null Pointer Exception. + (Cristian Ivascu via dhruba) + + HDFS-868. Fix link to Hadoop Upgrade Wiki. (Chris A. Mattmann via shv) + + HDFS-880. TestNNLeaseRecovery fails on windows (cos, shv) + + HDFS-699. Primary datanode should compare replicas' on disk lengths. + (hairong) + + HDFS-897. Fix a bug related to generation stamp comparison in + ReplicasMap. (suresh) + + HDFS-793. Data node should receive the whole packet ack message before it + constructs and sends its own ack message for the packet. (hairong) + + HDFS-101. DFS write pipeline: DFSClient sometimes does not detect second + datanode failure. (hairong) + + HDFS-822. Appends to already-finalized blocks can rename across volumes. + (hairong) + + HDFS-1046. Fix Tomcat version in hdfsproxy/build.xml. (Srikanth + Sundarrajan via szetszwo) + + HDFS-1072. Fix TestReadWhileWriting failure. (Erik Steffl via shv) + + HDFS-913. Rename fault injection test TestRename.java to TestFiRename.java + to include it in tests run by ant target run-test-hdfs-fault-inject. + (suresh) + + HDFS-695. RaidNode should read in configuration from hdfs-site.xml. + (dhruba) + + HDFS-726. Eclipse .classpath template has outdated jar files and is + missing some new ones. (cos) + + HDFS-750. Fix build failure due to TestRename. (suresh) + + HDFS-712. Move libhdfs from mapreduce subproject to hdfs subproject. + (Eli Collins via dhruba) + + HDFS-757. Enable Unit test for HDFS Raid. (dhruba) + + HDFS-611. Prevent DataNode heartbeat times from increasing even when + the DataNode has many blocks to delete. (Zheng Shao via dhruba) + + HDFS-751. Fix TestCrcCorruption to pick up the correct datablocks to + corrupt. (dhruba) + + HDFS-763. Fix slightly misleading report from DataBlockScanner + about corrupted scans. (dhruba) + + HDFS-727. bug setting block size hdfsOpenFile (Eli Collins via cos) + + HDFS-756. libhdfs unit tests do not run. (Eli Collins via cos) + + HDFS-783. libhdfs tests brakes code coverage runs with Clover (cos) + + HDFS-785. Add Apache license to several namenode unit tests. + (Ravi Phulari via jghoman) + + HDFS-802. Update Eclipse configuration to match changes to Ivy + configuration (Edwin Chan via cos) + + HDFS-423. Unbreak FUSE build and fuse_dfs_wrapper.sh (Eli Collins via cos) + + HDFS-825. Build fails to pull latest hadoop-core-* artifacts (cos) + + HDFS-94. The Heap Size printed in the NameNode WebUI is accurate. + (Dmytro Molkov via dhruba) + + HDFS-767. An improved retry policy when the DFSClient is unable to fetch a + block from the datanode. (Ning Zhang via dhruba) + + HDFS-775. FSDataset calls getCapacity() twice. (stevel) + + HDFS-885. Datanode toString() NPEs on null dnRegistration. (stevel) + + HDFS-877. Client-driven block verification not functioning. (Todd + Lipcon via hairong) + + HDFS-630. In DFSOutputStream.nextBlockOutputStream(), the client can + exclude specific datanodes when locating the next block. + (Cosmin Lehene via Stack) + + HDFS-922. Remove unnecessary semicolon added by HDFS-877 that causes + problems for Eclipse compilation. (jghoman) + + HDFS-927 DFSInputStream retries too many times for new block locations + (Todd Lipcon via Stack) + + HDFS-938. Replace calls to UGI.getUserName() with UGI.getShortUserName() + (jghoman) + + HDFS-894. DatanodeID.ipcPort is not updated when existing node + re-registers. (Todd Lipcon via tomwhite) + + HDFS-965. Split TestDelegationToken in to two parts and fix configuration + to allow proxy users in the test. (Jitendra Pandey via omalley) + + HDFS-999. Secondary namenode should login using kerberos if security is + configured (boryas) + + HDFS-856. Hardcoded replication level for new files in fuse-dfs. + (Brian Bockelman via tomwhite) + + HDFS-857. Incorrect type for fuse-dfs capacity can cause "df" to return + negative values on 32-bit machines. (Brian Bockelman via tomwhite) + + HDFS-858. Incorrect return codes for fuse-dfs. (Brian Bockelman via + tomwhite) + + HDFS-859. fuse-dfs utime behavior causes issues with tar. + (Brian Bockelman via tomwhite) + + HDFS-861. fuse-dfs does not support O_RDWR. (Brian Bockelman via tomwhite) + + HDFS-961. dfs_readdir incorrectly parses paths. (Eli Collins via tomwhite) + + HDFS-1015. Fix intermittent failure in TestSecurityTokenEditLog. + (Jitendra Nath Pandey via suresh) + + HDFS-939. libhdfs test is broken. (Eli Collins via tomwhite) + + HDFS-1074. hdfsproxy: Fix bugs in TestProxyUtil. (Srikanth Sundarrajan + via szetszwo) + + HDFS-481. hdfsproxy: Bug Fixes + HdfsProxy to use proxy user to + impresonate the real user. (Srikanth Sundarrajan via szetszwo) + + HDFS-482. Move HsftpFileSystem's ssl.client.do.not.authenticate.server + configuration setting to ssl-client.xml. (Srikanth Sundarrajan via + szetszwo) + + HDFS-1010. hdfsproxy: Retrieve groups from UnixUserGroupInformation + instead of LdapEntry. (Srikanth Sundarrajan via szetszwo) + + HDFS-466. hdfs_write infinite loop when dfs fails and cannot write + files > 2 GB. (Pete Wyckoff via tomwhite) + + HDFS-651. HDFS Docs - fix listing of docs in the doc menu. + (Corinne Chandel via tomwhite) + + HDFS-1014. Error in reading delegation tokens from edit logs. + (Jitendra Nath Pandey via jhoman) + + HDFS-1088. Prevent renaming a symbolik link to its target. + (Eli Collins via suresh) + + HDFS-966. NameNode does not recovers lease when it is in safemode. + (dhruba) + + HDFS-833. Datanode shutdown should log problems with Storage.unlockAll() + (Steve Loughran via dhruba) + + HDFS-1101. TestDiskError.testLocalDirs() fails. (cdouglas via jghoman) + + HDFS-1031. Enhance the webUi to list a few of the corrupted files in HDFS. + (Andre Orian via dhruba) + + HDFS-1078. Create static and dynamic versions of libhdfs. + (Sam Rash via dhruba) + + HDFS-1104. Fsck triggers full GC on NameNode. (hairong) + + HDFS-1141. Closing a file is successful only if the client still has a + valid lease. (Todd Lipcon via dhruba) + + HDFS-1138. Prevent erroneous updation of modification time of a directory + when fsimage loads. (Dmytro Molkov via dhruba) + + HDFS-1000. Updates libhdfs to the new API for UGI (ddas) + + HDFS-609. Create a file with the append flag does not work in HDFS. + (tomwhite) + + HDFS-1255. Fix failing test-libhdfs.sh test. (tomwhite) + + HDFS-1256. libhdfs is missing from the tarball. (tomwhite) + + HDFS-1057. Concurrent readers hit ChecksumExceptions if following a + writer to very end of file. (sam rash via hairong) + + HDFS-1212. Harmonize HDFS JAR library versions with Common. (tomwhite) + + HDFS-1159. clean-cache target removes wrong ivy cache (cos) + + HDFS-1193. -mvn-system-deploy target is broken which inturn fails the + mvn-deploy task leading to unstable mapreduce build (Giridharan + Kesavan via cos) + + HDFS-1299. 'compile-fault-inject' never should be called directly. (cos) + + HDFS-1311. Running tests with 'testcase' cause triple execution of the + same test case (Cos) + + HDFS-1267. fuse-dfs does not compile. (Devaraj Das via tomwhite) + + HDFS-1598. Directory listing on hftp:// does not show .*.crc files. + (szetszwo) + + HDFS-1750. ListPathsServlet should not use HdfsFileStatus.getLocalName() + to get file name since it may return an empty string. (szetszwo) + +Release 0.20.3 - Unreleased + + IMPROVEMENTS + + BUG FIXES + + HDFS-1041. DFSClient.getFileChecksum(..) should retry if connection to + the first datanode fails. (szetszwo) + + HDFS-909. Wait until edits syncing is finishes before purging edits. + (Todd Lipcon via shv) + + HDFS-1258. Clearing namespace quota on "/" corrupts fs image. + (Aaron T. Myers via szetszwo) + + HDFS-1406. TestCLI fails on Ubuntu with default /etc/hosts. (cos) + +Release 0.20.203.0 - 2011-5-11 + + IMPROVEMENTS + + HADOOP-7259. Contrib modules should include the build.properties from + the enclosing hadoop directory. (omalley) + + BUG FIXES + + HDFS-132. Fix namenode to not report files deleted metrics for deletions + done while replaying edits during startup. (suresh & shv) + + HDFS-955. New implementation of saveNamespace() to avoid loss of edits + when name-node fails during saving. (shv) + +Release 0.20.2 - 2009-09-01 + + IMPROVEMENTS + + HDFS-737. Add full path name of the file to the block information and + summary of total number of files, blocks, live and deadnodes to + metasave output. (Jitendra Nath Pandey via suresh) + + HDFS-919. Create test to validate the BlocksVerified metric (Gary Murry + via cos) + + HDFS-907. Add tests for getBlockLocations and totalLoad metrics. + (Ravi Phulari via cos) + + BUG FIXES + + HDFS-686. NullPointerException is thrown while merging edit log and image. + (hairong) + + HDFS-677. Rename failure when both source and destination quota exceeds + results in deletion of source. (suresh) + + HDFS-709. Fix TestDFSShell failure due to rename bug introduced by + HDFS-677. (suresh) + + HDFS-579. Fix DfsTask to follow the semantics of 0.19, regarding non-zero + return values as failures. (Christian Kunz via cdouglas) + + HDFS-723. Fix deadlock in DFSClient#DFSOutputStream. (hairong) + + HDFS-596. Fix memory leak in hdfsFreeFileInfo() for libhdfs. + (Zhang Bingjun via dhruba) + + HDFS-185. Disallow chown, chgrp, chmod, setQuota, and setSpaceQuota when + name-node is in safemode. (Ravi Phulari via shv) + + HDFS-187. Initialize secondary namenode http address in TestStartup. + (Todd Lipcon via szetszwo) + + HDFS-464. Fix memory leaks in libhdfs. (Christian Kunz via suresh) + + HDFS-1377. Quota bug for partial blocks allows quotas to be violated. (eli) + +Release 0.20.1 - 2009-09-01 + + IMPROVEMENTS + + HDFS-438. Improve help message for space quota command. (Raghu Angadi) + + BUG FIXES + + HDFS-167. Fix a bug in DFSClient that caused infinite retries on write. + (Bill Zeller via szetszwo) + + HDFS-527. Remove/deprecate unnecessary DFSClient constructors. (szetszwo) + + HDFS-525. The SimpleDateFormat object in ListPathsServlet is not thread + safe. (Suresh Srinivas and cdouglas) + + HDFS-761. Fix failure to process rename operation from edits log due to + quota verification. (suresh) diff --git a/aarch64/share/doc/hadoop/hdfs/LICENSE.txt b/aarch64/share/doc/hadoop/hdfs/LICENSE.txt new file mode 100644 index 0000000..9660123 --- /dev/null +++ b/aarch64/share/doc/hadoop/hdfs/LICENSE.txt @@ -0,0 +1,271 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +APACHE HADOOP SUBCOMPONENTS: + +The Apache Hadoop project contains subcomponents with separate copyright +notices and license terms. Your use of the source code for the these +subcomponents is subject to the terms and conditions of the following +licenses. + +For the org.apache.hadoop.util.bloom.* classes: + +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract + * 034819 (http://www.one-lab.org) + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +For src/main/native/util/tree.h: + +/*- + * Copyright 2002 Niels Provos + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ diff --git a/aarch64/share/doc/hadoop/hdfs/NOTICE.txt b/aarch64/share/doc/hadoop/hdfs/NOTICE.txt new file mode 100644 index 0000000..62fc581 --- /dev/null +++ b/aarch64/share/doc/hadoop/hdfs/NOTICE.txt @@ -0,0 +1,2 @@ +This product includes software developed by The Apache Software +Foundation (http://www.apache.org/). diff --git a/aarch64/share/doc/hadoop/mapreduce/CHANGES.txt b/aarch64/share/doc/hadoop/mapreduce/CHANGES.txt new file mode 100644 index 0000000..2c328ce --- /dev/null +++ b/aarch64/share/doc/hadoop/mapreduce/CHANGES.txt @@ -0,0 +1,6904 @@ +Hadoop MapReduce Change Log + +Release 2.2.0 - 2013-10-13 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + MAPREDUCE-5504. mapred queue -info inconsistent with types (Kousuke Saruta + via tgraves) + + MAPREDUCE-5488. Changed MR client to keep trying to reach the application + when it sees that on attempt's AM is down. (Jian He via vinodkv) + + MAPREDUCE-5515. Fixed MR AM's webapp to depend on a new config + mapreduce.ssl.enabled to enable https and disabling it by default as MR AM + needs to set up its own certificates etc and not depend on clusters'. + (Omkar Vinit Joshi via vinodkv) + + MAPREDUCE-5505. Clients should be notified job finished only after job + successfully unregistered (Zhijie Shen via bikas) + + MAPREDUCE-5503. Fixed a test issue in TestMRJobClient. (Jian He via vinodkv) + + MAPREDUCE-5170. Fixed a wrong log message in CombineFileInputFormat class. + (Sangjin Lee via vinodkv) + + MAPREDUCE-5525. Increase timeout of TestDFSIO.testAppend and + TestMRJobsWithHistoryService.testJobHistoryData. (Chuan Liu via cnauroth) + + MAPREDUCE-5513. ConcurrentModificationException in JobControl (Robert + Parker via jlowe) + + MAPREDUCE-5531. Fix compat with hadoop-1 in mapreduce.(TaskID, + TaskAttemptID) by re-introducing missing constructors. (Robert Kanter via + acmurthy) + + MAPREDUCE-5545. org.apache.hadoop.mapred.TestTaskAttemptListenerImpl.testCommitWindow + times out (Robert Kanter via jlowe) + + MAPREDUCE-5529. Fix compat with hadoop-1 in mapred.TotalOrderPartitioner + by re-introducing (get,set)PartitionFile which takes in JobConf. (Robert + Kanter via acmurthy) + + MAPREDUCE-5538. Fixed MR AppMaster to send job-notification URL only after + the job is really done - a bug caused by MAPREDUCE-5505. (Zhijie Shen via + vinodkv) + + MAPREDUCE-5551. Fix compat with hadoop-1 in + SequenceFileAsBinaryOutputFormat.WritableValueBytes by re-introducing + missing constructors. (Zhijie Shen via acmurthy) + + MAPREDUCE-5544. JobClient#getJob loads job conf twice. (Sandy Ryza) + + MAPREDUCE-5536. Fixed MR AM and JHS to respect + mapreduce.jobhistory.webapp.https.address. (Omkar Vinit Joshi via vinodkv) + + MAPREDUCE-5530. Fix compat with hadoop-1 in + mapred.lib.CombinFileInputFormat by re-introducing + isSplittable(FileSystem, Path) api and ensuring semantic compatibility. + (Robert Kanter via acmurthy) + + MAPREDUCE-5459. Update documentation on how to run MRv1 examples on YARN. + (Zhijie Shen via acmurthy) + + MAPREDUCE-5554. hdfs-site.xml included in hadoop-mapreduce-client-jobclient + tests jar is breaking tests for downstream components (Robert Kanter via + Sandy Ryza) + + MAPREDUCE-5489. MR jobs hangs as it does not use the node-blacklisting + feature in RM requests (Zhijie Shen via bikas) + + MAPREDUCE-5442. $HADOOP_MAPRED_HOME/$HADOOP_CONF_DIR setting not working on + Windows. (Yingda Chen via cnauroth) + + MAPREDUCE-5533. Fixed MR speculation code to track any TaskAttempts that + aren't heart-beating for a while, so that we can aggressively speculate + instead of waiting for task-timeout (Xuan Gong via vinodkv) + + MAPREDUCE-5562. Fixed MR App Master to perform pending tasks like staging-dir + cleanup, sending job-end notification correctly when unregister with RM + fails. (Zhijie Shen via vinodkv) + +Release 2.1.1-beta - 2013-09-23 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + MAPREDUCE-5478. TeraInputFormat unnecessarily defines its own FileSplit + subclass (Sandy Ryza) + + MAPREDUCE-5497. Changed MRAppMaster to sleep only after doing everything else + but just before ClientService to avoid race conditions during RM restart. + (Jian He via vinodkv) + + MAPREDUCE-5379. Include token tracking ids in jobconf. (kkambatl via tucu) + + MAPREDUCE-5523. Added separate configuration properties for https for JHS + without which even when https is enabled, it starts on http port itself. + (Omkar Vinit Joshi via vinodkv) + + OPTIMIZATIONS + + MAPREDUCE-5446. TestJobHistoryEvents and TestJobHistoryParsing have race + conditions (jlowe via kihwal) + + MAPREDUCE-5462. In map-side sort, swap entire meta entries instead of + indexes for better cache performance. (Sandy Ryza) + + MAPREDUCE-1981. Improve getSplits performance by using listLocatedStatus + (Hairong Kuang and Jason Lowe via jlowe) + + BUG FIXES + + MAPREDUCE-5385. Fixed a bug with JobContext getCacheFiles API. (Omkar Vinit + Joshi via vinodkv) + + MAPREDUCE-5428. HistoryFileManager doesn't stop threads when service is + stopped (Karthik Kambatla via jlowe) + + MAPREDUCE-5251. Reducer should not implicate map attempt if it has + insufficient space to fetch map output (Ashwin Shankar via jlowe) + + MAPREDUCE-5317. Stale files left behind for failed jobs (Ravi Prakash via + jlowe) + + MAPREDUCE-5358. MRAppMaster throws invalid transitions for JobImpl + (Devaraj K via jlowe) + + MAPREDUCE-3193. FileInputFormat doesn't read files recursively in the + input path dir (Devaraj K via jlowe) + + MAPREDUCE-5440. TestCopyCommitter Fails on JDK7 (Robert Parker via jlowe) + + MAPREDUCE-5367. Local jobs all use same local working directory + (Sandy Ryza) + + MAPREDUCE-5425. Junit in TestJobHistoryServer failing in jdk 7 (Robert + Parker via jlowe) + + MAPREDUCE-5454. TestDFSIO fails intermittently on JDK7 (Karthik Kambatla + via Sandy Ryza) + + MAPREDUCE-5001. LocalJobRunner has race condition resulting in job + failures (Sandy Ryza via jlowe) + + MAPREDUCE-5466. Changed MR AM to not promote history files of intermediate + AMs in case they are exiting because of errors and thus help history-server + pick up the right history file for the last successful AM. (Jian He via + vinodkv) + + MAPREDUCE-5468. Fix MR AM recovery for map-only jobs. (vinodkv via + acmurthy) + + MAPREDUCE-5470. LocalJobRunner does not work on Windows. (Sandy Ryza via + cnauroth) + + MAPREDUCE-5476. Changed MR AM recovery code to cleanup staging-directory + only after unregistering from the RM. (Jian He via vinodkv) + + MAPREDUCE-5483. revert MAPREDUCE-5357. (rkanter via tucu) + + MAPREDUCE-5441. Changed MR AM to return RUNNING state if exiting when RM + commands to reboot, so that client can continue to track the overall job. + (Jian He via vinodkv) + + MAPREDUCE-5475. MRClientService does not verify ACLs properly (jlowe) + + MAPREDUCE-5414. TestTaskAttempt fails in JDK7 with NPE (Nemon Lou via + devaraj) + + MAPREDUCE-5020. Compile failure with JDK8 (Trevor Robinson via tgraves) + + MAPREDUCE-5164. mapred job and queue commands omit HADOOP_CLIENT_OPTS + (Nemon Lou via devaraj) + + MAPREDUCE-5493. Cleanup in-memory & on-disk segments to prevent leak on + shuffle completion. (jlowe via acmurthy) + +Release 2.1.0-beta - 2013-08-22 + + INCOMPATIBLE CHANGES + + MAPREDUCE-4067. Changed MRClientProtocol api to throw IOException only (Xuan + Gong via vinodkv) + + MAPREDUCE-5234. Change mapred.TaskReport and mapreduce.TaskReport for binary + compatibility with mapred in 1.x but incompatible with 0.23.x. (Mayank Bansal + via vinodkv) + + MAPREDUCE-5156. Change hadoop examples ProgramDriver to be able to run + 1.x examples jar on top of YARN. This change breaks 0.23.x direct usages of + ProgramDriver. (Zhijie Shen via vinodkv) + + MAPREDUCE-5233. Add methods that are changed or removed from JobControl.Job + when compared to 1.x. This breaks 0.23.x users of one API in Job. (Mayank + Bansal via vinodkv) + + MAPREDUCE-5237. Add methods that were removed from ClusterStatus back into + 2.x so as to be compatible with 1.x. Incompatible as + ClusterStatus.UNINITIALIZED_MEMORY_VALUE is a long now and so breaks 0.23.x + but it shouldn't be a big deal in reality. (Zhijie Shen via vinodkv) + + NEW FEATURES + + HADOOP-8562. Enhancements to support Hadoop on Windows Server and Windows + Azure environments. (See breakdown of tasks below for subtasks and + contributors) + + IMPROVEMENTS + + MAPREDUCE-3008. Improvements to cumulative CPU emulation for short running + tasks in Gridmix. (amarrk via tgraves) + + MAPREDUCE-5033. mapred shell script should respect usage flags + (--help -help -h). (Andrew Wang via atm) + + MAPREDUCE-4892. Modify CombineFileInputFormat to not skew input slits' + allocation on small clusters. (Bikas Saha via vinodkv) + + MAPREDUCE-4990. Construct debug strings conditionally in + ShuffleHandler.Shuffle#sendMapOutput(). (kkambatl via tucu) + + MAPREDUCE-4875. coverage fixing for org.apache.hadoop.mapred + (Aleksey Gorshkov via bobby) + + MAPREDUCE-5129. Allow tags to JobHistory for deeper analytics. (billie via + acmurthy) + + MAPREDUCE-3787. [Gridmix] Optimize job monitoring and STRESS mode for + faster job submission. (amarrk via tgraves) + + MAPREDUCE-5079. Changes job recovery to restore state directly from job + history, instaed of simulating state machine events. + (Jason Lowe and Robert Parker via sseth) + + MAPREDUCE-4981. Add WordMean, WordMedian, WordStandardDeviation + to ExamplesDriver. (Plamen Jeliazkov via shv) + + MAPREUDUCE-5059. Change average merge time on Job overview page to be the + time delta between the end of the shuffle and the start of the reduce. + (Omkar Vinit Joshi via vinodkv) + + MAPREDUCE-4985. Add compression option to TestDFSIO usage. + (Plamen Jeliazkov via shv) + + MAPREDUCE-5152. Make MR App to simply pass through the container from RM + instead of extracting and populating information itself to start any + container. (vinodkv) + + MAPREDUCE-5175. Updated MR App to not set envs that will be set by NMs + anyways after YARN-561. (Xuan Gong via vinodkv) + + MAPREDUCE-5069. add concrete common implementations of + CombineFileInputFormat (Sangjin Lee via bobby) + + MAPREDUCE-5145. Changed default max-attempts to be more than one for MR jobs + inline with YARN. (Zhijie Shen via vinodkv) + + MAPREDUCE-5036. Default shuffle handler port should not be 8080. + (Sandy Ryza via tomwhite) + + MAPREDUCE-5159. Change ValueAggregatorJob to add APIs which can support + binary compatibility with hadoop-1 examples. (Zhijie Shen via vinodkv) + + MAPREDUCE-5157. Bring back old sampler related code so that we can support + binary compatibility with hadoop-1 sorter example. (Zhijie Shen via vinodkv) + + MAPREDUCE-5222. Bring back some methods and constants in Jobclient for + binary compatibility with mapred in 1.x. (Karthik Kambatla via vinodkv) + + MAPREDUCE-5235. Bring back old fields and exceptions in Counters for + binary compatibility with mapred in 1.x. (Mayank Bansal via vinodkv) + + MAPREDUCE-5246. Specify application-type at the time of job submission after + YARN-563. (Mayank Bansal via vinodkv) + + MAPREDUCE-5230. Bring back NLineInputFormat.createFileSplit for binary + compatibility with mapred in 1.x (Mayank Bansal via vinodkv) + + MAPREDUCE-5270. Migrated MR app from using BuilderUtil factory methods to + individual record factory methods. (Jian He via vinodkv) + + MAPREDUCE-5263. Bring back old methods and fields in + filecache.DistributedCache for binary compatibility with mapred in 1.x. + (Zhijie Shen via vinodkv) + + MAPREDUCE-5228. Bring back FileInputFormat.Counter and + FileOuputFormat.Counter for binary compatibility with 1.x mapred APIs. + (Mayank Bansal via vinodkv) + + MAPREDUCE-5176. Add annotation for tagging tasks as responsive to + preemption. (Carlo Curino, cdouglas) + + MAPREDUCE-5275. Bring back a couple of APIs in mapreduce.security.TokenCache + for binary compatibility with 1.x mapreduce APIs. (Mayank Bansal via vinodkv) + + MAPREDUCE-5231. Bring back a constructor in mapred's + DBInputFormat.DBRecordReader for binary compatibility with 1.x mapred APIs. + (Zhijie Shen via vinodkv) + + MAPREDUCE-5273. Bring back a couple of protected variables in mapred and + mapreduce CombineFileRecordReader for binary compatibility with 1.x APIs. + (Mayank Bansal via vinodkv) + + MAPREDUCE-5280. Bring back removed constructor and a method in mapreduce + ClusterMetrics for binary compatibility with 1.x APIs. (Mayank Bansal via + vinodkv) + + MAPREDUCE-5289. Updated MR App to use Token directly after YARN-717. (Jian He + via vinodkv) + + MAPREDUCE-5229. Put back FileOutputCommitter.TEMP_DIR_NAME in mapreduce for + binary compatibility with 1.x APIs. (Zhijie Shen via vinodkv) + + MAPREDUCE-5274. Bring back SecureShuffleUtils.toHex in mapreduce for binary + compatibility with 1.x APIs. (Mayank Bansal via vinodkv) + + MAPREDUCE-5300. Fix backward incompatibility for + o.a.h.mapreduce.filecache.DistributedCache. (Zhijie Shen via acmurthy) + + MAPREDUCE-5283. Over 10 different tests have near identical + implementations of AppContext (Sandy Ryza via jlowe) + + MAPREDUCE-5199. Removing ApplicationTokens file as it is no longer needed. + (Daryn Sharp via vinodkv) + + MAPREDUCE-5192. Allow for alternate resolutions of TaskCompletionEvents. + (cdouglas via acmurthy) + + MAPREDUCE-5184. Document compatibility for MapReduce applications in + hadoop-2 vis-a-vis hadoop-1. (Zhijie Shen via acmurthy) + + MAPREDUCE-5194. Heed interrupts during Fetcher shutdown. (cdouglas) + + MAPREDUCE-5326. Added version to shuffle header. (Zhijie Shen via + acmurthy) + + MAPREDUCE-5333. Add test that verifies MRAM works correctly when sending + requests with non-normalized capabilities. (ywskycn via tucu) + + MAPREDUCE-5398. MR changes for YARN-513 (Jian He via bikas) + + OPTIMIZATIONS + + MAPREDUCE-4974. Optimising the LineRecordReader initialize() method + (Gelesh via bobby) + + MAPREDUCE-5268. Improve history server startup performance (Karthik + Kambatla via jlowe) + + MAPREDUCE-5352. Optimize node local splits generated by + CombineFileInputFormat. (sseth) + + BUG FIXES + + MAPREDUCE-4671. AM does not tell the RM about container requests which are + no longer needed. (Bikas Saha via sseth) + + MAPREDUCE-4994. -jt generic command line option does not work. (sandyr via tucu) + + MAPREDUCE-5000. Fixes getCounters when speculating by fixing the selection + of the best attempt for a task. (Jason Lowe via sseth) + + MAPREDUCE-4994. Addendum fixing testcases failures. (sandyr via tucu) + + MAPREDUCE-4846. Some JobQueueInfo methods are public in MR1 but protected + in MR2. (Sandy Ryza via tomwhite) + + MAPREDUCE-5013. mapred.JobStatus compatibility: MR2 missing constructors + from MR1. (Sandy Ryza via tomwhite) + + MAPREDUCE-4951. Container preemption interpreted as task failure. + (Sandy Ryza via tomwhite) + + MAPREDUCE-5008. Merger progress miscounts with respect to EOF_MARKER. + (Sandy Ryza via tomwhite) + + MAPREDUCE-4693. History server should include counters for failed tasks. + (Xuan Gong via sseth) + + MAPREDUCE-4896. mapred queue -info spits out ugly exception when queue does + not exist. (sandyr via tucu) + + MAPREDUCE-3685. Fix bugs in MergeManager to ensure compression codec is + appropriately used and that on-disk segments are correctly sorted on + file-size. (Anty Rao and Ravi Prakash via acmurthy) + + MAPREDUCE-4571. TestHsWebServicesJobs fails on jdk7. (tgraves via tucu) + + MAPREDUCE-4716. TestHsWebServicesJobsQuery.testJobsQueryStateInvalid + fails with jdk7. (tgraves via tucu) + + MAPREDUCE-5075. DistCp leaks input file handles since ThrottledInputStream + does not close the wrapped InputStream. (Chris Nauroth via szetszwo) + + MAPREDUCE-3872. Fix an event handling races in ContainerLauncherImpl. + (Robert Kanter via sseth) + + MAPREDUCE-5062. Fix MR AM to read max-retries from the RM. (Zhijie Shen via + vinodkv) + + MAPREDUCE-3829. [Gridmix] Gridmix should give better error message when + input data directory already exists and -generate opton is + given.(ravigummadi via tgraves) + + MAPREDUCE-2722. [Gridmix] Gridmix simulated job's map's hdfsBytesRead + counter is wrong when compressed input is used.(ravigummadi via tgraves) + + MAPREDUCE-3953. [Gridmix] Gridmix throws NPE and does not simulate a + job if the trace contains null taskStatus for a task. (ravigummadi via + tgraves) + + MAPREDUCE-4087. [Gridmix] GenerateDistCacheData job of Gridmix can + become slow in some cases (ravigummadi via tgraves). + + MAPREDUCE-5077. Remove mapreduce.util.ResourceCalculatorPlugin and related + code. (Karthik Kambatla via sseth) + + MAPREDUCE-4083. [Gridmix] NPE in cpu emulation. (amarrk via tgraves) + + MAPREDUCE-4100. [Gridmix] Bug fixed in compression emulation feature for + map only jobs. (amarrk via tgraves) + + MAPREDUCE-4356. [Rumen] Provide access to the method + ParsedTask.obtainTaskAttempts(). (ravigummadi via tgraves) + + MAPREDUCE-4149. [Rumen] Rumen fails to parse certain counter + strings. (ravigummadi via tgraves) + + MAPREDUCE-3757. [Rumen] Fixed Rumen Folder to adjust shuffleFinished and + sortFinished times when needed. (Ravi Gummadi via tgraves) + + MAPREDUCE-5138. Fix LocalDistributedCacheManager after YARN-112. (Omkar Vinit + Joshi via vinodkv) + + MAPREDUCE-5086. MR app master deletes staging dir when sent a reboot + command from the RM. (Jian He via jlowe) + + MAPREDUCE-5113. Streaming input/output types are ignored with java + mapper/reducer. (sandyr via tucu) + + MAPREDUCE-5098. Fix findbugs warnings in gridmix. (kkambatl via tucu) + + MAPREDUCE-5137. AM web UI: clicking on Map Task results in 500 error + (Thomas Graves via jlowe) + + MAPREDUCE-5136. TestJobImpl->testJobNoTasks fails with IBM JAVA (Amir + Sanjar via jlowe) + + MAPREDUCE-5139. Update MR AM to use the modified startContainer API after + YARN-486. (Xuan Gong via vinodkv) + + MAPREDUCE-5151. Update MR AM to use standard exit codes from the API after + YARN-444. (Sandy Ryza via vinodkv) + + MAPREDUCE-5140. MR part of YARN-514 (Zhijie Shen via bikas) + + MAPREDUCE-5128. mapred-default.xml is missing a bunch of history server + configs. (sandyr via tucu) + + MAPREDUCE-4898. FileOutputFormat.checkOutputSpecs and + FileOutputFormat.setOutputPath incompatible with MR1. (rkanter via tucu) + + MAPREDUCE-5078. TestMRAppMaster fails on Windows due to mismatched path + separators. (Chris Nauroth via sseth) + + MAPREDUCE-4932. mapreduce.job#getTaskCompletionEvents incompatible with + Hadoop 1. (rkanter via tucu) + + MAPREDUCE-5163. Update MR App to not use API utility methods for collections + after YARN-441. (Xuan Gong via vinodkv) + + MAPREDUCE-5066. Added a timeout for the job.end.notification.url. (Ivan + Mitic via acmurthy) + + MAPREDUCE-5146. application classloader may be used too early to load + classes. (Sangjin Lee via tomwhite) + + MAPREDUCE-4737. Ensure that mapreduce APIs are semantically consistent + with mapred API w.r.t Mapper.cleanup and Reducer.cleanup; in the sense that + cleanup is now called even if there is an error. The old mapred API + already ensures that Mapper.close and Reducer.close are invoked during + error handling. Note that it is an incompatible change, however end-users + can override Mapper.run and Reducer.run to get the old (inconsistent) + behaviour. (acmurthy) + + MAPREDUCE-5166. Fix ConcurrentModificationException due to insufficient + synchronization on updates to task Counters. (Sandy Ryza via acmurthy) + + MAPREDUCE-5181. RMCommunicator should not use AMToken from the env. + (Vinod Kumar Vavilapalli via sseth) + + MAPREDUCE-5178. Update MR App to set progress in ApplicationReport after + YARN-577. (Hitesh Shah via vinodkv) + + MAPREDUCE-5167. Update MR App after YARN-562 to use the new builder API + for the container. (Jian He via vinodkv) + + MAPREDUCE-5179. Fix unit test in TestHSWebServices which fails when + versionInfo has parantheses like when running on a git checkout. (Hitesh Shah + via vinodkv) + + MAPREDUCE-5193. A few MR tests use block sizes which are smaller than the + default minimum block size. (Andrew Wang via atm) + + MAPREDUCE-5205. Fixed MR App to load tokens correctly. (vinodkv) + + MAPREDUCE-5204. Handling YarnRemoteException separately from IOException in + MR app after YARN-629. (Xuan Gong via vinodkv) + + MAPREDUCE-5209. Fix units in a ShuffleScheduler log message. + (Tsuyoshi OZAWA via cdouglas) + + MAPREDUCE-5212. Handling YarnRemoteException separately from IOException in + MR App's use of ClientRMProtocol after YARN-631. (Xuan Gong via vinodkv) + + MAPREDUCE-5226. Handling YarnRemoteException separately from IOException in + MR App's use of AMRMProtocol after YARN-630. (Xuan Gong via vinodkv) + + MAPREDUCE-4942. mapreduce.Job has a bunch of methods that throw + InterruptedException so its incompatible with MR1. (rkanter via tucu) + + MAPREDUCE-5239. Updated MR App to reflect YarnRemoteException changes after + YARN-634. (Siddharth Seth via vinodkv) + + MAPREDUCE-5208. Modified ShuffleHandler to use SecureIOUtils for reading + local files. (Omkar Vinit Joshi via vinodkv) + + MAPREDUCE-5220. Setter methods in TaskCompletionEvent are public in MR1 and + protected in MR2. (sandyr via tucu) + + MAPREDUCE-5240. Fix a bug in MRAppMaster because of which OutputCommitter + could not access credentials set by the user. (vinodkv) + + MAPREDUCE-5244. Two functions changed their visibility in JobStatus. + (zjshen via tucu) + + MAPREDUCE-4927. Historyserver 500 error due to NPE when accessing specific + counters page for failed job. (Ashwin Shankar via jlowe) + + MAPREDUCE-5257. Fix issues in TestContainerLauncherImpl after YARN-617. + (Omkar Vinit Joshi via vinodkv) + + MAPREDUCE-5282. Updating MR App to use immutable ApplicationID after + YARN-716. (Siddharth Seth via vinodkv) + + MAPREDUCE-5286. Change MapReduce to use ContainerTokenIdentifier instead + of the entire Container in the startContainer call - YARN-684. + (Vinod Kumar Vavilapalli via sseth) + + MAPREDUCE-5299. Fix backward incompatibility for TaskCompletionEvent by + adding back setTaskID. (Zhijie Shen via acmurthy) + + MAPREDUCE-5296. Fix backward incompatibility for JobControl by adding the + omitted addJob. (Zhijie Shen via acmurthy) + + MAPREDUCE-5245. Added back constants to JobConf to fix incompatibilities. + (Zhijie Shen via acmurthy) + + MAPREDUCE-5297. Updated MR App since BuilderUtils is no longer public + after YARN-748. (Jian He via vinodkv) + + MAPREDUCE-5301. Updated MR code to work with YARN-635 changes of renaming + YarnRemoteException to YarnException. (Siddharth Seth via vinodkv) + + MAPREDUCE-5308. Shuffling to memory can get out-of-sync when fetching + multiple compressed map outputs (Nathan Roberts via jlowe) + + MAPREDUCE-5315. DistCp reports success even on failure. (mithun and jlowe + via daryn) + + MAPREDUCE-5259. TestTaskLog fails on Windows because of path separators + missmatch. (Ivan Mitic via cnauroth) + + MAPREDUCE-4019. -list-attempt-ids is not working (Ashwin Shankar, + Devaraj K, and B Anil Kumar via jlowe) + + MAPREDUCE-5334. Fix failing unit tests - TestContainerLauncher, + TestContainerLauncherImpl. (Vinod Kumar Vavilapalli via sseth) + + MAPREDUCE-5325. MR changes related to YARN-727. ClientRMProtocol.getAllApplications + should accept ApplicationType as a parameter. (Xuan Gong via hitesh) + + MAPREDUCE-5291. Change MR App to use updated property names in + container-log4j.properties. (Zhijie Shen via sseth) + + MAPREDUCE-5303. Changed MR app after moving ProtoBase to package impl.pb via + YARN-724. (Jian He via vinodkv) + + MAPREDUCE-5312. TestRMNMInfo is failing. (sandyr via tucu) + + MAPREDUCE-5304. mapreduce.Job killTask/failTask/getTaskCompletionEvents + methods have incompatible signature changes. (kkambatl via tucu) + + MAPREDUCE-5298. Moved MapReduce services to YARN-530 stricter lifecycle. + (Steve Loughran via vinodkv) + + MAPREDUCE-5319. Set user.name in job.xml. (Xuan Gong via acmurthy) + + MAPREDUCE-5310. MRAM should not normalize allocation request capabilities. + (tucu) + + MAPREDUCE-5213. Re-assess TokenCache methods marked @Private. + (kkambatl via tucu) + + MAPREDUCE-5412. Update MR app to use multiple containers API of + ContainerManager after YARN-926. (Jian He via vinodkv) + + MAPREDUCE-5421. Fixed TestNonExistentJob failure after YARN-873. (Junping Du + via vinodkv) + + MAPREDUCE-5419. TestSlive is getting FileNotFound Exception (Robert Parker + via jlowe) + + MAPREDUCE-5399. Unnecessary Configuration instantiation in IFileInputStream + slows down merge. (Stanislav Barton via Sandy Ryza) + + BREAKDOWN OF HADOOP-8562 SUBTASKS + + MAPREDUCE-4739. Some MapReduce tests fail to find winutils. + (Chris Nauroth via suresh) + + MAPREDUCE-4780. MapReduce distribution build fails on Windows. + (Chris Nauroth via suresh) + + MAPREDUCE-4790. MapReduce build script would be more readable using abspath. + (Chris Nauroth via suresh) + + MAPREDUCE-4869. Fix TestMapReduceChildJVM. (Chris Nauroth via acmurthy) + + MAPREDUCE-4870. Fix TestMRJobsWithHistoryService. (Chris Nauroth via acmurthy) + + MAPREDUCE-4983. Fixed various platform specific assumptions in various tests, + so that they can pass on Windows too. (Chris Nauroth via vinodkv) + + HADOOP-9372. Fix bad timeout annotations on tests. + (Arpit Agarwal via suresh) + + MAPREDUCE-4885. Streaming tests have multiple failures on Windows. (Chris + Nauroth via bikas) + + MAPREDUCE-5177. Use common utils FileUtil#setReadable/Writable/Executable & + FileUtil#canRead/Write/Execute. (Ivan Mitic via suresh) + + MAPREDUCE-5349. TestClusterMapReduceTestCase and TestJobName fail on Windows + in branch-2. (Chuan Liu via cnauroth) + + MAPREDUCE-5355. MiniMRYarnCluster with localFs does not work on Windows. + (Chuan Liu via cnauroth) + + MAPREDUCE-5359. JobHistory should not use File.separator to match timestamp + in path. (Chuan Liu via cnauroth) + + MAPREDUCE-5357. Job staging directory owner checking could fail on Windows. + (Chuan Liu via cnauroth) + + MAPREDUCE-5360. TestMRJobClient fails on Windows due to path format. + (Chuan Liu via cnauroth) + + MAPREDUCE-5366. TestMRAsyncDiskService fails on Windows. (Chuan Liu via + cnauroth) + + MAPREDUCE-5187. Create mapreduce command scripts on Windows. (Chuan Liu via + cnauroth) + + MAPREDUCE-4374. Fix child task environment variable config and add support + for Windows. (Chuan Liu via cnauroth) + +Release 2.0.5-alpha - 06/06/2013 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + + MAPREDUCE-5240 inside of FileOutputCommitter the initialized Credentials cache + appears to be empty. (vinodkv) + +Release 2.0.4-alpha - 2013-04-25 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + + MAPREDUCE-5006. Fix failing streaming tests due to MAPREDUCE-4994. + (Sandy Ryza via tomwhite) + + MAPREDUCE-5088. MR Client gets an renewer token exception while Oozie is + submitting a job (Daryn Sharp via cos) + + MAPREDUCE-5117. Changed MRClientProtocolPBClientImpl to be closeable and thus + fix failures in renewal of HistoryServer's delegations tokens. (Siddharth + Seth via vinodkv) + + MAPREDUCE-5083. MiniMRCluster should use a random component when creating an + actual cluster (Siddharth Seth via hitesh) + + MAPREDUCE-5094. Disabled memory monitoring by default in MiniMRYarnCluster + to avoid some downstream tests failing. (Siddharth Seth via vinodkv) + +Release 2.0.3-alpha - 2013-02-06 + + INCOMPATIBLE CHANGES + + MAPREDUCE-4123. Remove the 'mapred groups' command, which is no longer + supported. (Devaraj K via sseth) + + MAPREDUCE-4938. Use token request messages defined in hadoop common. + (suresh) + + NEW FEATURES + + MAPREDUCE-4520. Added support for MapReduce applications to request for + CPU cores along-with memory post YARN-2. (acmurthy) + + MAPREDUCE-4810. Added new admin command options for MR AM. (Jerry Chen via + vinodkv) + + MAPREDUCE-4049. Experimental api to allow for alternate shuffle plugins. + (Avner BenHanoch via acmurthy) + + MAPREDUCE-4807. Allow MapOutputBuffer to be pluggable. (masokan via tucu) + + MAPREDUCE-4808. Refactor MapOutput and MergeManager to facilitate reuse + by Shuffle implementations. (masokan via tucu) + + IMPROVEMENTS + + MAPREDUCE-3678. The Map tasks logs should have the value of input + split it processed. (harsh) + + MAPREDUCE-4616. Improve javadoc for MultipleOutputs. (Tony Burton via + acmurthy) + + HADOOP-8911. CRLF characters in source and text files. + (Raja Aluri via suresh) + + MAPREDUCE-4703. Add the ability to start the MiniMRClientCluster using + the configurations used before it is being stopped. (ahmed.radwan via tucu) + + MAPREDUCE-4845. ClusterStatus.getMaxMemory() and getUsedMemory() exist in + MR1 but not MR2. (Sandy Ryza via tomwhite) + + MAPREDUCE-4899. Implemented a MR specific plugin for tracking finished + applications that YARN's ResourceManager doesn't keep track off anymore + (Derek Dagit via vinodkv) + + MAPREDUCE-4920. Use security token protobuf definition from hadoop common. + (Suresh Srinivas via vinodkv) + + MAPREDUCE-4907. TrackerDistributedCacheManager issues too many getFileStatus + calls. (sandyr via tucu) + + MAPREDUCE-4949. Enable multiple pi jobs to run in parallel. (sandyr via tucu) + + MAPREDUCE-4809. Change visibility of classes for pluggable sort changes. + (masokan via tucu) + + MAPREDUCE-4838. Add additional fields like Locality, Avataar to the + JobHistory logs. (Zhijie Shen via sseth) + + MAPREDUCE-4971. Minor extensibility enhancements to Counters & + FileOutputFormat. (Arun C Murthy via sseth) + + MAPREDUCE-4977. Documentation for pluggable shuffle and pluggable sort. + (tucu) + + OPTIMIZATIONS + + MAPREDUCE-4893. Fixed MR ApplicationMaster to do optimal assignment of + containers to get maximum locality. (Bikas Saha via vinodkv) + + BUG FIXES + + MAPREDUCE-4272. SortedRanges.Range#compareTo is not spec compliant. + (Yu Gao via llu) + + MAPREDUCE-4607. Race condition in ReduceTask completion can result in Task + being incorrectly failed. (Bikas Saha via tomwhite) + + MAPREDUCE-4646. Fixed MR framework to send diagnostic information correctly + to clients in case of failed jobs also. (Jason Lowe via vinodkv) + + MAPREDUCE-4674. Hadoop examples secondarysort has a typo + "secondarysrot" in the usage. (Robert Justice via eli) + + MAPREDUCE-4681. Fix unit tests broken by HDFS-3910. (acmurthy) + + MAPREDUCE-4712. mr-jobhistory-daemon.sh doesn't accept --config + (Vinod Kumar Vavilapalli via tgraves) + + MAPREDUCE-4654. TestDistCp is ignored. (Sandy Ryza via tomwhite) + + MAPREDUCE-4637. Handle TaskAttempt diagnostic updates while in the NEW and + UNASSIGNED states. (Mayank Bansal via sseth) + + MAPREDUCE-1806. CombineFileInputFormat does not work with paths not on default FS. (Gera Shegalov via tucu) + + MAPREDUCE-4777. In TestIFile, testIFileReaderWithCodec relies on + testIFileWriterWithCodec. (Sandy Ryza via tomwhite) + + MAPREDUCE-4800. Cleanup o.a.h.mapred.MapTaskStatus - remove unused + code. (kkambatl via tucu) + + MAPREDUCE-4861. Cleanup: Remove unused mapreduce.security.token.DelegationTokenRenewal. + (kkambatl via tucu) + + MAPREDUCE-4856. TestJobOutputCommitter uses same directory as + TestJobCleanup. (Sandy Ryza via tomwhite) + + MAPREDUCE-4895. Fix compilation failure of org.apache.hadoop.mapred. + gridmix.TestResourceUsageEmulators (Dennis Y via tgraves) + + MAPREDUCE-4278. Cannot run two local jobs in parallel from the same + gateway. (Sandy Ryza via tomwhite) + + MAPREDUCE-1700. User supplied dependencies may conflict with MapReduce + system JARs. (tomwhite) + + MAPREDUCE-4936. JobImpl uber checks for cpu are wrong (Arun C Murthy via + jlowe) + + MAPREDUCE-4924. flakey test: org.apache.hadoop.mapred.TestClusterMRNotification.testMR. + (rkanter via tucu) + + MAPREDUCE-4923. Add toString method to TaggedInputSplit. (sandyr via tucu) + + MAPREDUCE-4948. Fix a failing unit test TestYARNRunner.testHistoryServerToken. + (Junping Du via sseth) + + MAPREDUCE-4803. Remove duplicate copy of TestIndexCache. (Mariappan Asokan + via sseth) + + MAPREDUCE-2264. Job status exceeds 100% in some cases. + (devaraj.k and sandyr via tucu) + + MAPREDUCE-4969. TestKeyValueTextInputFormat test fails with Open JDK 7. + (Arpit Agarwal via suresh) + + MAPREDUCE-4884. Streaming tests fail to start MiniMRCluster due to missing + queue configuration. (Chris Nauroth via suresh) + + MAPREDUCE-4953. HadoopPipes misuses fprintf. (Andy Isaacson via atm) + +Release 2.0.2-alpha - 2012-09-07 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + MAPREDUCE-987. Exposing MiniDFS and MiniMR clusters as a single process + command-line. (ahmed via tucu) + + MAPREDUCE-4417. add support for encrypted shuffle (tucu) + + MAPREDUCE-4355. Add RunningJob.getJobStatus() (kkambatl via tucu) + + MAPREDUCE-3451. Port Fair Scheduler to MR2 (pwendell via tucu) + + MAPREDUCE-4438. Add a simple, generic client to run 'easy' AMs in YARN. + (Bikas Saha via acmurthy) + + IMPROVEMENTS + + MAPREDUCE-4157. ResourceManager should not kill apps that are well behaved + (Jason Lowe via bobby) + + MAPREDUCE-4511. Add IFile readahead (ahmed via tucu) + + MAPREDUCE-4408. allow jobs to set a JAR that is in the distributed cached + (rkanter via tucu) + + MAPREDUCE-4440. Changed SchedulerApp and SchedulerNode to be a minimal + interface to allow schedulers to maintain their own. (acmurthy) + + MAPREDUCE-4146. Support limits on task status string length and number of + block locations in branch-2. (Ahmed Radwan via tomwhite) + + MAPREDUCE-3871. Allow symlinking in LocalJobRunner DistributedCache. + (tomwhite) + + MAPREDUCE-3921. MR AM should act on node health status changes. + (Bikas Saha via sseth) + + MAPREDUCE-4355. Add RunningJob.getJobStatus() (kkambatl via tucu) + + MAPREDUCE-4427. Added an 'unmanaged' mode for AMs so as to ease + development of new applications. (Bikas Saha via acmurthy) + + MAPREDUCE-3289. Make use of fadvise in the NM's shuffle handler. + (Todd Lipcon and Siddharth Seth via sseth) + + MAPREDUCE-4580. Change MapReduce to use the yarn-client module. + (Vinod Kumar Vavilapalli via sseth) + + MAPREDUCE-4579. Split TestTaskAttempt into two so as to pass tests on + jdk7. (Thomas Graves via vinodkv) + + MAPREDUCE-4638. MR AM supplies MapReduce jars in classpath rather than + rely on YARN. (acmurthy) + + BUG FIXES + + MAPREDUCE-4422. YARN_APPLICATION_CLASSPATH needs a documented default value in + YarnConfiguration. (ahmed via tucu) + + MAPREDUCE-4406. Users should be able to specify the MiniCluster ResourceManager + and JobHistoryServer ports. (ahmed via tucu) + + MAPREDUCE-4407. Add hadoop-yarn-server-tests--tests.jar to hadoop dist + package. (ahmed via tucu) + + MAPREDUCE-4465. Update description of yarn.nodemanager.address property. + (bowang via tucu) + + MAPREDUCE-4342. Distributed Cache gives inconsistent result if cache files + get deleted from tasktracker. (mayank_bansal via tucu) + + MAPREDUCE-4498. Remove hsqldb jar from Hadoop runtime classpath. (rkanter via tucu) + + MAPREDUCE-4494. TestFifoScheduler failing with Metrics source QueueMetrics,q0=default + already exists!. (ahmed.radwan via tucu) + + MAPREDUCE-4484. Incorrect IS_MINI_YARN_CLUSTER property name in YarnConfiguration. + (ahmed.radwan via tucu) + + MAPREDUCE-4562. Support for "FileSystemCounter" legacy counter group name + for compatibility reasons is creating incorrect counter name. + (Jarek Jarcec Cecho via tomwhite) + + MAPREDUCE-4068. Jars in lib subdirectory of the submittable JAR are not added to the + classpath (rkanter via tucu) + + MAPREDUCE-4577. HDFS-3672 broke + TestCombineFileInputFormat.testMissingBlocks() test (atm) + + MAPREDUCE-4470. Fix TestCombineFileInputFormat.testForEmptyFile (ikatsov via tucu) + + MAPREDUCE-4608. hadoop-mapreduce-client is missing some dependencies. + (tucu via tomwhite) + + MAPREDUCE-4610. Support deprecated mapreduce.job.counters.limit property in + MR2. (tomwhite) + + MAPREDUCE-4629. Remove JobHistory.DEBUG_MODE (Karthik Kambatla via bobby) + + MAPREDUCE-4642. MiniMRClientClusterFactory should not use job.setJar() (rkanter via tucu) + + MAPREDUCE-4148. MapReduce should not have a compile-time dependency on + HDFS. (tomwhite) + + MAPREDUCE-4250. hadoop-config.sh missing variable exports, causes Yarn + jobs to fail with ClassNotFoundException MRAppMaster. (phunt via tucu) + + MAPREDUCE-4002. MultiFileWordCount job fails if the input path is not + from default file system. (Bhallamudi Venkata Siva Kamesh via todd) + + MAPREDUCE-4274 MapOutputBuffer should use native byte order for kvmeta. + (todd via bobby) + + MAPREDUCE-4262. NM gives wrong log message saying "Connected to + ResourceManager" before trying to connect. (Devaraj K via tgraves) + + MAPREDUCE-4276. Allow setting yarn.nodemanager.delete.debug-delay-sec + property to "-1" for easier container debugging. (ahmed via tucu) + + MAPREDUCE-4224. TestFifoScheduler throws + org.apache.hadoop.metrics2.MetricsException (Devaraj K via tgraves) + + MAPREDUCE-3493. Add the default mapreduce.shuffle.port property + to mapred-default.xml (Madhukara Phatak via harsh) + + MAPREDUCE-4307. TeraInputFormat calls FileSystem.getDefaultBlockSize() + without a Path - Failure when using ViewFileSystem. (Ahmed Radwan via eli) + + MAPREDUCE-4313. TestTokenCache doesn't compile due + TokenCache.getDelegationToken compilation error (bobby) + + MAPREDUCE-3873. Fixed NodeManagers' decommissioning at RM to accept IP + addresses also. (xieguiming via vinodkv) + + MAPREDUCE-4306. Fix distributed shell to work with users other than the one + running the daemons. (Ahmed Radwan via sseth) + + MAPREDUCE-4031. Prevent a Node Manager hang during shutdown. + (Devaraj K via sseth) + + MAPREDUCE-4336. Distributed Shell fails when used with the CapacityScheduler + (ahmed via tucu) + + MAPREDUCE-4290. Fix Yarn Applicaiton Status to MR JobState conversion. + (Devaraj K via sseth) + + MAPREDUCE-2289. Permissions race can make getStagingDir fail on local filesystem + (ahmed via tucu) + + MAPREDUCE-4372. Deadlock in Resource Manager (Devaraj K via bobby) + + MAPREDUCE-4376. TestClusterMRNotification times out (Kihwal Lee via bobby) + + MAPREDUCE-4383. HadoopPipes.cc needs to include unistd.h. + (Andy Isaacson via eli) + + MAPREDUCE-2739. Update installation docs (remove YarnClientFactory) (bowang via tucu) + + MAPREDUCE-3993. Graceful handling of codec errors during decompression + (kkambatl via tucu) + + MAPREDUCE-4416. Some tests fail if Clover is enabled (Kihwal Lee via bobby) + + MAPREDUCE-4441. Fix build issue caused by MR-3451 (kkambatl via tucu) + + HADOOP-8499. Lower min.user.id to 500 for the tests. + (Colin Patrick McCabe via eli) + + MAPREDUCE-4395. Possible NPE at ClientDistributedCacheManager + #determineTimestamps (Bhallamudi via bobby) + + MAPREDUCE-4380. Empty Userlogs directory is getting created under logs + directory (Devaraj K via bobby) + + MAPREDUCE-4649. Ensure MapReduce JobHistory Daemon doens't assume + HADOOP_YARN_HOME and HADOOP_MAPRED_HOME are the same. (vinodkv via + acmurthy) + +Release 2.0.0-alpha - 05-23-2012 + + INCOMPATIBLE CHANGES + + MAPREDUCE-3545. Remove Avro RPC. (suresh) + + NEW FEATURES + + IMPROVEMENTS + + MAPREDUCE-2887. Due to HADOOP-7524, change RPC to allow multiple protocols + including multuple versions of the same protocol (Sanjay Radia) + + MAPREDUCE-2934. MR portion of HADOOP-7607 - Simplify the RPC proxy cleanup + process (atm) + + HADOOP-7862. MR changes to work with HADOOP 7862: Move the support for + multiple protocols to lower layer so that Writable, PB and Avro can all + use it (Sanjay Radia) + + MAPREDUCE-3909 Javadoc the Service interfaces (stevel) + + MAPREDUCE-3885. Avoid an unnecessary copy for all requests/responses in + MRs ProtoOverHadoopRpcEngine. (Devaraj Das via sseth) + + MAPREDUCE-3935. Annotate Counters.Counter and Counters.Group as @Public. + (tomwhite) + + MAPREDUCE-3991. Streaming FAQ has some wrong instructions about input files + splitting. (harsh) + + MAPREDUCE-3773. Add queue metrics with buckets for job run times. (omalley + via acmurthy) + + MAPREDUCE-3970. Add ServiceOperations class to aid working with Services + (stevel) + + MAPREDUCE-3353. Add a channel between RM and AM to get information on + nodes. (Bikas Saha via acmurthy) + + MAPREDUCE-3955. Change MR to use ProtobufRpcEngine from hadoop-common + instead of ProtoOverHadoopRpcEngine. (Jitendra Nath Pandey via sseth) + + MAPREDUCE-4103. Fix HA docs for changes to shell command fencer args (todd) + + MAPREDUCE-4093. Improve RM WebApp start up when proxy address is not set + (Devaraj K vai bobby) + + MAPREDUCE-4138. Reduce memory usage of counters due to non-static nested + classes. (tomwhite) + + MAPREDUCE-3883. Document yarn.nodemanager.delete.debug-delay-sec + configuration property (Eugene Koontz via tgraves) + + MAPREDUCE-4219. make default container-executor.conf.dir be a path + relative to the container-executor binary. (rvs via tucu) + + MAPREDUCE-4205. retrofit all JVM shutdown hooks to use ShutdownHookManager + (tucu) + + HADOOP-8285 MR changes for Use ProtoBuf for RpcPayLoadHeader (sanjay radia) + + MAPREDUCE-2220. Fix new API FileOutputFormat-related typos in + mapred-default.xml (Rui Kubo via harsh) + + MAPREDUCE-3907. Document entries mapred-default.xml for the + jobhistory server. (Eugene Koontz via harsh) + + MAPREDUCE-3906. Fix inconsistency in documentation regarding + mapreduce.jobhistory.principal. (Eugene Koontz via harsh) + + MAPREDUCE-4432. Confusing warning message when GenericOptionsParser + is not used. (Gabriel Reid via harsh) + + OPTIMIZATIONS + + BUG FIXES + + MAPREDUCE-3740. Fixed broken mapreduce compilation after the patch for + HADOOP-7965. (Devaraj K via vinodkv) + + MAPREDUCE-3818. Fixed broken compilation in TestSubmitJob after the patch + for HDFS-2895. (Suresh Srinivas via vinodkv) + + MAPREDUCE-2942. TestNMAuditLogger.testNMAuditLoggerWithIP failing (Thomas + Graves via mahadev) + + MAPREDUCE-3933. Failures because MALLOC_ARENA_MAX is not set (ahmed via tucu) + + MAPREDUCE-3728. ShuffleHandler can't access results when configured in a + secure mode (ahmed via tucu) + + MAPREDUCE-3952. In MR2, when Total input paths to process == 1, + CombinefileInputFormat.getSplits() returns 0 split. (zhenxiao via tucu) + + MAPREDUCE-3578. Starting nodemanager as root gives "Unknown -jvm option" + (tomwhite) + + MAPREDUCE-3348. Fixed a bug in MR client to redirect to JobHistoryServer + correctly when RM forgets the app. (Devaraj K via vinodkv) + + MAPREDUCE-3974. TestSubmitJob in MR1 tests doesn't compile after HDFS-162 + merge. (atm) + + MAPREDUCE-4007. JobClient getJob(JobID) should return NULL if the job + does not exist (for backwards compatibility) (tucu) + + MAPREDUCE-3431 NPE in Resource Manager shutdown. (stevel) + + MAPREDUCE-4010. TestWritableJobConf fails on trunk (tucu via bobby) + + MAPREDUCE-3992. Reduce fetcher doesn't verify HTTP status code of response + (todd) + + MAPREDUCE-4066. Use default value when fetching MR_AM_STAGING_DIR + (xieguiming via harsh) + + MAPREDUCE-3377. Added a unit test to ensure OutputCommitter.checkOutputSpecs + is called prior to copying job.xml. (Jane Chen via acmurthy) + + MAPREDUCE-4081. TestMROutputFormat.java does not compile (Jason Lowe via + bobby) + + MAPREDUCE-4082. hadoop-mapreduce-client-app's mrapp-generated-classpath + file should not be in the module JAR (tucu) + + MAPREDUCE-3916. various issues with running yarn proxyserver (devaraj via tucu) + + MAPREDUCE-4091. tools testcases failing because of MAPREDUCE-4082 (tucu) + + MAPREDUCE-4098. TestMRApps testSetClasspath fails (tucu) + + MAPREDUCE-4097. tools testcases fail because missing mrapp-generated-classpath + file in classpath (rvs via tucu) + + MAPREDUCE-4113. Fix tests org.apache.hadoop.mapred.TestClusterMRNotification + (Devaraj K via bobby) + + MAPREDUCE-4112. Fix tests org.apache.hadoop.mapred.TestClusterMapReduceTestCase + (Devaraj K via bobby) + + MAPREDUCE-4111. Fix tests in org.apache.hadoop.mapred.TestJobName (Devaraj + K via bobby) + + MAPREDUCE-4110. Fix tests in org.apache.hadoop.mapred.TestMiniMRClasspath & + org.apache.hadoop.mapred.TestMiniMRWithDFSWithDistinctUsers (Devaraj K via + bobby) + + MAPREDUCE-4105. Yarn RackResolver ignores rack configurations. + (Ahmed Radwan via tomwhite) + + MAPREDUCE-3869. Fix classpath for DistributedShell application. (Devaraj K + via sseth) + + MAPREDUCE-4057. Update RAID for the HA and fsdataset changes. (Devaraj K + via szetszwo) + + MAPREDUCE-4076. Stream job fails with ZipException when use yarn jar + command (Devaraj K via bobby) + + MAPREDUCE-4108. Fix tests in org.apache.hadoop.util.TestRunJar + (Devaraj K via tgraves) + + MAPREDUCE-4107. Fix tests in org.apache.hadoop.ipc.TestSocketFactory + (Devaraj K via tgraves) + + MAPREDUCE-4147. YARN should not have a compile-time dependency on HDFS. + (tomwhite) + + MAPREDUCE-4008. ResourceManager throws MetricsException on start up + saying QueueMetrics MBean already exists (Devaraj K via tgraves) + + MAPREDUCE-3867. MiniMRYarn/MiniYarn uses fixed ports (tucu) + + MAPREDUCE-4141. clover integration broken, also mapreduce poms are + pulling in clover as a dependency. (phunt via tucu) + + MAPREDUCE-4193. broken doc link for yarn-default.xml in site.xml. + (phunt via tomwhite) + + MAPREDUCE-4202. TestYarnClientProtocolProvider is broken (Daryn Sharp via + bobby) + + MAPREDUCE-3173. MRV2 UI doesn't work properly without internet (Devaraj K + via bobby) + + MAPREDUCE-3958. RM: Remove RMNodeState and replace it with NodeState + (Bikas Saha via bobby) + + MAPREDUCE-4231. Update RAID to use the new BlockCollection interface. + (szetszwo) + + MAPREDUCE-4483. 2.0 build does not work (John George via bobby) + + MAPREDUCE-4444. nodemanager fails to start when one of the local-dirs is + bad (Jason Lowe via bobby) + +Release 0.23.9 - UNRELEASED + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + MAPREDUCE-5268. Improve history server startup performance (Karthik + Kambatla via jlowe) + + BUG FIXES + + MAPREDUCE-5308. Shuffling to memory can get out-of-sync when fetching + multiple compressed map outputs (Nathan Roberts via jlowe) + + MAPREDUCE-5315. DistCp reports success even on failure. (mithun and jlowe + via daryn) + + MAPREDUCE-4019. -list-attempt-ids is not working (Ashwin Shankar, + Devaraj K, and B Anil Kumar via jlowe) + +Release 0.23.8 - 2013-06-05 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + MAPREDUCE-5065. DistCp should skip checksum comparisons if block-sizes + are different on source/target (Mithun Radhakrishnan via kihwal) + + OPTIMIZATIONS + + BUG FIXES + + MAPREDUCE-5015. Coverage fix for org.apache.hadoop.mapreduce.tools.CLI + (Aleksey Gorshkov via tgraves) + + MAPREDUCE-5147. Maven build should create + hadoop-mapreduce-client-app-VERSION.jar directly (Robert Parker via tgraves) + + MAPREDUCE-4927. Historyserver 500 error due to NPE when accessing specific + counters page for failed job. (Ashwin Shankar via jlowe) + +Release 0.23.7 - 2013-04-18 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + MAPREDUCE-4905. test org.apache.hadoop.mapred.pipes + (Aleksey Gorshkov via bobby) + + MAPREDUCE-4989. JSONify DataTables input data for Attempts page (Ravi + Prakash via jlowe) + + MAPREDUCE-5027. Shuffle does not limit number of outstanding connections + (Robert Parker via jeagles) + + MAPREDUCE-4972. Coverage fixing for org.apache.hadoop.mapreduce.jobhistory + (Aleksey Gorshkov via bobby) + + OPTIMIZATIONS + + MAPREDUCE-4946. Fix a performance problem for large jobs by reducing the + number of map completion event type conversions. (Jason Lowe via sseth) + + MAPREDUCE-4822. Unnecessary conversions in History Events. (Chu Tong via + jlowe) + + BUG FIXES + + MAPREDUCE-4458. Warn if java.library.path is used for AM or Task + (Robert Parker via jeagles) + + MAPREDUCE-4992. AM hangs in RecoveryService when recovering tasks with + speculative attempts (Robert Parker via jlowe) + + MAPREDUCE-5009. Killing the Task Attempt slated for commit does not clear + the value from the Task commitAttempt member (Robert Parker via jeagles) + + MAPREDUCE-4871. AM uses mapreduce.jobtracker.split.metainfo.maxsize but + mapred-default has mapreduce.job.split.metainfo.maxsize (Jason Lowe via + jeagles) + + MAPREDUCE-4794. DefaultSpeculator generates error messages on normal + shutdown (Jason Lowe via jeagles) + + MAPREDUCE-5043. Fetch failure processing can cause AM event queue to + backup and eventually OOM (Jason Lowe via bobby) + + MAPREDUCE-5023. History Server Web Services missing Job Counters (Ravi + Prakash via tgraves) + + MAPREDUCE-5060. Fetch failures that time out only count against the first + map task (Robert Joseph Evans via jlowe) + + MAPREDUCE-5042. Reducer unable to fetch for a map task that was recovered + (Jason Lowe via bobby) + + MAPREDUCE-5053. java.lang.InternalError from decompression codec cause + reducer to fail (Robert Parker via jeagles) + + MAPREDUCE-4991. coverage for gridmix (Aleksey Gorshkov via tgraves) + + MAPREDUCE-5007. fix coverage org.apache.hadoop.mapreduce.v2.hs (Aleksey + Gorshkov via tgraves) + + MAPREDUCE-5137. AM web UI: clicking on Map Task results in 500 error + (Thomas Graves via jlowe) + +Release 0.23.6 - 2013-02-06 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + MAPREDUCE-4811. JobHistoryServer should show when it was started in WebUI + About page (Ravi Prakash via jlowe) + + OPTIMIZATIONS + + BUG FIXES + + MAPREDUCE-4802. Takes a long time to load the task list on the AM for + large jobs (Ravi Prakash via bobby) + + MAPREDUCE-4764. repair TestBinaryTokenFile (Ivan A. Veselovsky via bobby) + + MAPREDUCE-4825. JobImpl.finished doesn't expect ERROR as a final job state + (jlowe via bobby) + + MAPREDUCE-4817. Hardcoded task ping timeout kills tasks localizing large + amounts of data (tgraves) + + MAPREDUCE-4836. Elapsed time for running tasks on AM web UI tasks page is 0 + (Ravi Prakash via jeagles) + + MAPREDUCE-4842. Shuffle race can hang reducer (Mariappan Asokan via jlowe) + + MAPREDUCE-4833. Task can get stuck in FAIL_CONTAINER_CLEANUP (Robert + Parker via jlowe) + + MAPREDUCE-4793. Problem with adding resources when using both -files and + -file to hadoop streaming (jlowe) + + MAPREDUCE-4890. Invalid TaskImpl state transitions when task fails while + speculating (jlowe) + + MAPREDUCE-4902. Fix typo "receievd" should be "received" in log output + (Albert Chu via jlowe) + + MAPREDUCE-4813. AM timing out during job commit (jlowe via bobby) + + MAPREDUCE-4279. getClusterStatus() fails with null pointer exception when + running jobs in local mode (Devaraj K via bobby) + + MAPREDUCE-4832. MR AM can get in a split brain situation (jlowe) + + MAPREDUCE-4894. Renewal / cancellation of JobHistory tokens (Siddharth + Seth via tgraves) + + MAPREDUCE-4819. AM can rerun job after reporting final job status to the + client (bobby and Bikas Saha via bobby) + + MAPREDUCE-4913. TestMRAppMaster#testMRAppMasterMissingStaging occasionally + exits (Jason Lowe via tgraves) + + MAPREDUCE-4848. TaskAttemptContext cast error during AM recovery (Jerry + Chen via jlowe) + + MAPREDUCE-4921. JobClient should acquire HS token with RM principal + (daryn via bobby) + + MAPREDUCE-4934. Maven RAT plugin is not checking all source files (tgraves) + + MAPREDUCE-4678. Running the Pentomino example with defaults throws + java.lang.NegativeArraySizeException (Chris McConnell via harsh) + + MAPREDUCE-4925. The pentomino option parser may be buggy. + (Karthik Kambatla via harsh) + +Release 0.23.5 - 2012-11-28 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + MAPREDUCE-4596. Split StateMachine state from states seen by MRClientProtocol + for Job, Task and TaskAttempt. (Siddarth Seth via vinodkv) + + MAPREDUCE-4752. Reduce MR AM memory usage through String Interning (Robert + Evans via tgraves) + + MAPREDUCE-4266. remove Ant remnants from MR (tgraves via bobby) + + MAPREDUCE-4666. JVM metrics for history server (jlowe via jeagles) + + OPTIMIZATIONS + + MAPREDUCE-4720. Browser thinks History Server main page JS is taking too + long (Ravi Prakash via bobby) + + BUG FIXES + + MAPREDUCE-4554. Job Credentials are not transmitted if security is turned + off (Benoy Antony via bobby) + + MAPREDUCE-4705. Fix a bug in job history lookup, which makes older jobs + inaccessible despite the presence of a valid history file. (Jason Lowe + via sseth) + + MAPREDUCE-4521. mapreduce.user.classpath.first incompatibility with 0.20/1.x + (Ravi Prakash via bobby) + + MAPREDUCE-4721. Task startup time in JHS is same as job startup time. + (Ravi Prakash via bobby) + + MAPREDUCE-4479. Fix parameter order in assertEquals() in + TestCombineInputFileFormat.java (Mariappan Asokan via bobby) + + MAPREDUCE-4733. Reducer can fail to make progress during shuffle if too many + reducers complete consecutively. (Jason Lowe via vinodkv) + + MAPREDUCE-4740. only .jars can be added to the Distributed Cache + classpath. (Robert Joseph Evans via jlowe) + + MAPREDUCE-4229. Intern counter names in the JT (Miomir Boljanovic and bobby via daryn) + + MAPREDUCE-4741. WARN and ERROR messages logged during normal AM shutdown. + (Vinod Kumar Vavilapalli via jlowe) + + MAPREDUCE-4730. Fix Reducer's EventFetcher to scale the map-completion + requests slowly to avoid HADOOP-8942. (Jason Lowe via vinodkv) + + MAPREDUCE-4748. Invalid event: T_ATTEMPT_SUCCEEDED at SUCCEEDED. (jlowe) + + MAPREDUCE-4724. job history web ui applications page should be sorted to + display last app first (tgraves via bobby) + + MAPREDUCE-4746. The MR Application Master does not have a config to set + environment variables (Rob Parker via bobby) + + MAPREDUCE-4729. job history UI not showing all job attempts. (Vinod + Kumar Vavilapalli via jlowe) + + MAPREDUCE-4763 repair test TestUmbilicalProtocolWithJobToken (Ivan A. + Veselovsky via bobby) + + MAPREDUCE-4771. KeyFieldBasedPartitioner not partitioning properly when + configured (jlowe via bobby) + + MAPREDUCE-4772. Fetch failures can take way too long for a map to be + restarted (bobby) + + MAPREDUCE-4782. NLineInputFormat skips first line of last InputSplit + (Mark Fuhs via bobby) + + MAPREDUCE-4774. JobImpl does not handle asynchronous task events in FAILED + state (jlowe via bobby) + + MAPREDUCE-4751. AM stuck in KILL_WAIT for days (vinodkv via bobby) + + MAPREDUCE-4787. TestJobMonitorAndPrint is broken (Rob Parker via bobby) + + MAPREDUCE-4425. Speculation + Fetch failures can lead to a hung job (jlowe + via bobby) + + MAPREDUCE-4786. Job End Notification retry interval is 5 milliseconds by + default (Ravi Prakash via bobby) + + MAPREDUCE-4517. Too many INFO messages written out during AM to RM heartbeat + (Jason Lowe via tgraves) + + MAPREDUCE-4797. LocalContainerAllocator can loop forever trying to contact + the RM (jlowe via bobby) + + MAPREDUCE-4801. ShuffleHandler can generate large logs due to prematurely + closed channels (jlowe via bobby) + +Release 0.23.4 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + MAPREDUCE-2786. Add compression option for TestDFSIO. + (Plamen Jeliazkov via shv) + + MAPREDUCE-4645. Provide a random seed to Slive to make the sequence + of file names deterministic. (Ravi Prakash via shv) + + MAPREDUCE-4651. Benchmarking random reads with DFSIO. (shv) + + OPTIMIZATIONS + + BUG FIXES + + MAPREDUCE-4647. We should only unjar jobjar if there is a lib directory + in it. (Robert Evans via tgraves) + + MAPREDUCE-4691. Historyserver can report "Unknown job" after RM says job + has completed (Robert Joseph Evans via jlowe) + + MAPREDUCE-4689. JobClient.getMapTaskReports on failed job results in NPE + (jlowe via bobby) + +Release 0.23.3 + + INCOMPATIBLE CHANGES + + MAPREDUCE-4072. User set java.library.path seems to overwrite default + creating problems native lib loading (Anupam Seth via bobby) + + MAPREDUCE-3812. Lower default allocation sizes, fix allocation + configurations and document them (Harsh J via bobby) + + NEW FEATURES + + IMPROVEMENTS + + MAPREDUCE-4059. The history server should have a separate pluggable + storage/query interface. (Robert Evans via tgraves) + + MAPREDUCE-3942. Randomize master key generation for + ApplicationTokenSecretManager and roll it every so often. (Vinod Kumar + Vavilapalli via sseth) + + MAPREDUCE-4151. RM scheduler web page should filter apps to those that + are relevant to scheduling (Jason Lowe via tgraves) + + MAPREDUCE-4134. Remove references of mapred.child.ulimit etc. since they + are not being used any more (Ravi Prakash via bobby) + + MAPREDUCE-3972. Fix locking and exception issues in JobHistory server. + (Robert Joseph Evans via sseth) + + MAPREDUCE-4161. create sockets consistently (Daryn Sharp via bobby) + + MAPREDUCE-4079. Allow MR AppMaster to limit ephemeral port range. + (bobby via tgraves) + + MAPREDUCE-4210. Expose listener address for WebApp (Daryn Sharp via bobby) + + MAPREDUCE-4162. Correctly set token service (Daryn Sharp via bobby) + + MAPREDUCE-4301. Dedupe some strings in MRAM for memory savings + (bobby via tgraves) + + MAPREDUCE-4267. mavenize pipes (tgraves via bobby) + + MAPREDUCE-4375. Show Configuration Tracability in MR UI (bobby + via tgraves) + + MAPREDUCE-4569. Fixed TestHsWebServicesJobsQuery to pass on JDK7 by not + depending on test order. (Thomas Graves via vinodkv) + + OPTIMIZATIONS + + MAPREDUCE-3850. Avoid redundant calls for tokens in TokenCache (Daryn + Sharp via bobby) + + BUG FIXES + + MAPREDUCE-4092. commitJob Exception does not fail job (Jon Eagles via + bobby) + + MAPREDUCE-4089. Hung Tasks never time out. (Robert Evans via tgraves) + + MAPREDUCE-4024. RM webservices can't query on finalStatus (Tom Graves + via bobby) + + MAPREDUCE-4060. Multiple SLF4J binding warning (Jason Lowe via bobby) + + MAPREDUCE-3983. TestTTResourceReporting can fail, and should just be + deleted (Ravi Prakash via bobby) + + MAPREDUCE-4012 Hadoop Job setup error leaves no useful info to users + (when LinuxTaskController is used). (tgraves) + + MAPREDUCE-4062. AM Launcher thread can hang forever (tgraves via bobby) + + MAPREDUCE-3988. mapreduce.job.local.dir doesn't point to a single directory + on a node. (Eric Payne via bobby) + + MAPREDUCE-3999. Tracking link gives an error if the AppMaster hasn't + started yet (Ravi Prakash via bobby) + + MAPREDUCE-4020. Web services returns incorrect JSON for deep queue tree + (Anupam Seth via tgraves) + + MAPREDUCE-3672. Killed maps shouldn't be counted towards + JobCounter.NUM_FAILED_MAPS. (Anupam Seth via tgraves) + + MAPREDUCE-3682 Tracker URL says AM tasks run on localhost. + (Ravi Prakash via tgraves) + + MAPREDUCE-3082. Archive command take wrong path for input file with current + directory (John George via bobby) + + MAPREDUCE-3650. testGetTokensForHftpFS() fails (Ravi Prakash via bobby) + + MAPREDUCE-3621. TestDBJob and TestDataDrivenDBInputFormat ant tests fail + (Ravi Prakash via tgraves) + + MAPREDUCE-4073. CS assigns multiple off-switch containers when using + multi-level-queues (Siddharth Seth via bobby) + + MAPREDUCE-4051. Remove the empty hadoop-mapreduce-project/assembly/all.xml + file (Ravi Prakash via bobby) + + MAPREDUCE-4117. mapred job -status throws NullPointerException (Devaraj K + via bobby) + + MAPREDUCE-4099. ApplicationMaster may fail to remove staging directory + (Jason Lowe via bobby) + + MAPREDUCE-4017. Add jobname to jobsummary log (tgraves and Koji Noguchi + via bobby) + + MAPREDUCE-4040. History links should use hostname rather than IP address. + (Bhallamudi Venkata Siva Kamesh via sseth) + + MAPREDUCE-4099 amendment. ApplicationMaster will remove staging directory + after the history service is stopped. (Jason Lowe via sseth) + + MAPREDUCE-3932. Fix the TaskAttempt state machine to handle + CONTIANER_LAUNCHED and CONTIANER_LAUNCH_FAILED events in additional + states. (Robert Joseph Evans via sseth) + + MAPREDUCE-4140. mapreduce classes incorrectly importing + "clover.org.apache.*" classes. (Patrick Hunt via tomwhite) + + MAPREDUCE-4050. For tasks without assigned containers, changes the node + text on the UI to N/A instead of a link to null. (Bhallamudi Venkata Siva + Kamesh via sseth) + + MAPREDUCE-4128. AM Recovery expects all attempts of a completed task to + also be completed. (Bikas Saha via bobby) + + MAPREDUCE-4144. Fix a NPE in the ResourceManager when handling node + updates. (Jason Lowe via sseth) + + MAPREDUCE-4156. ant build fails compiling JobInProgress (tgraves) + + MAPREDUCE-4160. some mrv1 ant tests fail with timeout - due to 4156 + (tgraves) + + MAPREDUCE-4074. Client continuously retries to RM When RM goes down + before launching Application Master (xieguiming via tgraves) + + MAPREDUCE-4159. Job is running in Uber mode after setting + "mapreduce.job.ubertask.maxreduces" to zero (Devaraj K via bobby) + + MAPREDUCE-4165. Committing is misspelled as commiting in task logs + (John Eagles via bobby) + + MAPREDUCE-4129. Lots of unneeded counters log messages (Ahmed Radwan via + bobby) + + MAPREDUCE-3947. yarn.app.mapreduce.am.resource.mb not documented + (Devaraj K via bobby) + + MAPREDUCE-4190. Improve web UI for task attempts userlog link (Tom Graves + via bobby) + + MAPREDUCE-4133. MR over viewfs is broken (John George via bobby) + + MAPREDUCE-4194. ConcurrentModificationError in DirectoryCollection + (Jonathan Eagles via bobby) + + MAPREDUCE-3613. web service calls header contains 2 content types + (tgraves) + + MAPREDUCE-4169. Container Logs appear in unsorted order (Jonathan Eagles + via bobby) + + MAPREDUCE-4189. TestContainerManagerSecurity is failing (Devaraj K via + bobby) + + MAPREDUCE-4209. junit dependency in hadoop-mapreduce-client is missing + scope test (Radim Kolar via bobby) + + MAPREDUCE-4206. Sorting by Last Health-Update on the RM nodes page sorts + does not work correctly (Jonathon Eagles via tgraves) + + MAPREDUCE-4212. TestJobClientGetJob sometimes fails + (Daryn Sharp via tgraves) + + MAPREDUCE-4211. Error conditions (missing appid, appid not found) are + masked in the RM app page (Jonathan Eagles via bobby) + + MAPREDUCE-4163. consistently set the bind address (Daryn Sharp via bobby) + + MAPREDUCE-4048. NullPointerException exception while accessing the + Application Master UI (Devaraj K via bobby) + + MAPREDUCE-4220. RM apps page starttime/endtime sorts are incorrect + (Jonathan Eagles via bobby) + + MAPREDUCE-4226. ConcurrentModificationException in FileSystemCounterGroup. + (tomwhite) + + MAPREDUCE-4215. RM app page shows 500 error on appid parse error + (Jonathon Eagles via tgraves) + + MAPREDUCE-4237. TestNodeStatusUpdater can fail if localhost has a domain + associated with it (bobby) + + MAPREDUCE-4233. NPE can happen in RMNMNodeInfo. (bobby) + + MAPREDUCE-4238. mavenize data_join. (tgraves) + + MAPREDUCE-4102. job counters not available in Jobhistory webui for + killed jobs (Bhallamudi Venkata Siva Kamesh via tgraves) + + MAPREDUCE-3543. Mavenize Gridmix. (tgraves) + + MAPREDUCE-4197. Include the hsqldb jar in the hadoop-mapreduce tar + file (Ravi Prakash via tgraves) + + MAPREDUCE-4269. documentation: Gridmix has javadoc warnings in + StressJobFactory (Jonathon Eagles via tgraves). + + MAPREDUCE-3870. Invalid App Metrics + (Bhallamudi Venkata Siva Kamesh via tgraves). + + MAPREDUCE-4152. map task left hanging after AM dies trying to connect to RM + (Tom Graves via bobby) + + MAPREDUCE-4297. Usersmap file in gridmix should not fail on empty lines + (Ravi Prakash via bobby) + + MAPREDUCE-4302. NM goes down if error encountered during log aggregation + (Daryn Sharp via bobby) + + MAPREDUCE-3350. Per-app RM page should have the list of application-attempts + like on the app JHS page (Jonathon Eagles via tgraves) + + MAPREDUCE-3842. Stop webpages from automatic refreshing (tgraves) + + MAPREDUCE-3927. Shuffle hang when set map.failures.percent + (Bhallamudi Venkata Siva Kamesh via tgraves) + + MAPREDUCE-4311. Capacity scheduler.xml does not accept decimal values for + capacity and maximum-capacity settings (Karthik Kambatla via tgraves) + + MAPREDUCE-4341. add types to capacity scheduler properties documentation + (Karthik Kambatla via tgraves) + + MAPREDUCE-4270. Move the data_join test classes to the correct path. + (Thomas Graves via sseth) + + MAPREDUCE-3889. job client tries to use /tasklog interface, but that + doesn't exist anymore (Devaraj K via bobby) + + MAPREDUCE-4320. gridmix mainClass wrong in pom.xml (tgraves) + + MAPREDUCE-4295. RM crashes due to DNS issue (tgraves) + + MAPREDUCE-4228. mapreduce.job.reduce.slowstart.completedmaps is not working + properly (Jason Lowe via bobby) + + MAPREDUCE-4392. Counters.makeCompactString() changed behavior from 0.20 + (Jason Lowe via bobby) + + MAPREDUCE-4384. Race conditions in IndexCache (Kihwal Lee via tgraves) + + MAPREDUCE-4387. RM gets fatal error and exits during TestRM + (Kihwal Lee via tgraves) + + MAPREDUCE-4379. Node Manager throws java.lang.OutOfMemoryError: Java heap + space due to org.apache.hadoop.fs.LocalDirAllocator.contexts (Devaraj K + via bobby) + + MAPREDUCE-4402. TestFileInputFormat fails intermittently (Jason Lowe via + bobby) + + MAPREDUCE-4300. OOM in AM can turn it into a zombie. (Robert Evans via + tgraves) + + MAPREDUCE-4252. MR2 job never completes with 1 pending task (Tom White via + bobby) + + MAPREDUCE-3940. ContainerTokens should have an expiry interval. (Siddharth + Seth and Vinod Kumar Vavilapalli via vinodkv) + + MAPREDUCE-4419. ./mapred queue -info -showJobs displays all + the jobs irrespective of (Devaraj K via bobby) + + MAPREDUCE-4299. Terasort hangs with MR2 FifoScheduler (Tom White via + bobby) + + MAPREDUCE-4437. Race in MR ApplicationMaster can cause reducers to never be + scheduled (Jason Lowe via bobby) + + MAPREDUCE-4449. Incorrect MR_HISTORY_STORAGE property name in JHAdminConfig + (Ahmed Radwan via bobby) + + MAPREDUCE-4283. Display tail of aggregated logs by default (Jason Lowe via + bobby) + + MAPREDUCE-4448. Fix NM crash during app cleanup if aggregation didn't + init. (Jason Lowe via daryn) + + MAPREDUCE-3893. allow capacity scheduler configs maximum-applications and + maximum-am-resource-percent configurable on a per queue basis (tgraves via + bobby) + + MAPREDUCE-4467. IndexCache failures due to missing synchronization + (Kihwal Lee via tgraves) + + MAPREDUCE-4423. Potential infinite fetching of map output (Robert Evans + via tgraves) + + MAPREDUCE-4456. LocalDistributedCacheManager can get an + ArrayIndexOutOfBounds when creating symlinks (Robert Evans via tgraves) + + MAPREDUCE-4496. AM logs link is missing user name (Jason Lowe via bobby) + + MAPREDUCE-4493. Distibuted Cache Compatability Issues (Robert Evans + via tgraves) + + MAPREDUCE-4492. Configuring total queue capacity between 100.5 and 99.5 at + perticular level is sucessfull (Mayank Bansal via bobby) + + MAPREDUCE-4457. mr job invalid transition TA_TOO_MANY_FETCH_FAILURE at + FAILED (Robert Evans via tgraves) + + MAPREDUCE-4234. SortValidator.java is incompatible with multi-user or + parallel use (due to a /tmp file with static name) (Robert Evans via + jeagles) + + MAPREDUCE-4504. SortValidator writes to wrong directory (Robert Evans + via tgraves) + + MAPREDUCE-4503. Should throw InvalidJobConfException if duplicates found in + cacheArchives or cacheFiles (Robert Evans via jeagles) + + MAPREDUCE-3782. teragen terasort jobs fail when using webhdfs:// (Jason + Lowe via bobby) + + MAPREDUCE-4053. Counters group names deprecation is wrong, iterating over + group names deprecated names don't show up (Robert Evans via tgraves) + + MAPREDUCE-3506. Calling getPriority on JobInfo after parsing a history log + with JobHistoryParser throws a NullPointerException (Jason Lowe via bobby) + + MAPREDUCE-4570. ProcfsBasedProcessTree#constructProcessInfo() prints a + warning if procfsDir//stat is not found. (Ahmed Radwan via bobby) + + MAPREDUCE-4600. TestTokenCache.java from MRV1 no longer compiles (daryn + via bobby) + + MAPREDUCE-4612. job summary file permissions not set when its created + (tgraves via bobby) + + MAPREDUCE-4614. Simplify debugging a job's tokens (daryn via bobby) + + MAPREDUCE-4611. MR AM dies badly when Node is decommissioned (Robert + Evans via tgraves) + + MAPREDUCE-4604. In mapred-default, mapreduce.map.maxattempts & + mapreduce.reduce.maxattempts defaults are set to 4 as well as + mapreduce.job.maxtaskfailures.per.tracker. (Ravi Prakash via jeagles) + + MAPREDUCE-4633. history server doesn't set permissions on all subdirs + (tgraves via bobby) + + MAPREDUCE-4641. Exception in commitJob marks job as successful in job + history (Jason Lowe via bobby) + + MAPREDUCE-4549. Distributed cache conflicts breaks backwards compatability + (Robert Evans via tucu) + +Release 0.23.2 - UNRELEASED + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + MAPREDUCE-3849. Change TokenCache's reading of the binary token file + (Daryn Sharp via bobby) + + MAPREDUCE-3854. Fixed and reenabled tests related to MR child JVM's + environmental variables in TestMiniMRChildTask. (Tom White via vinodkv) + + MAPREDUCE-3877 Add a test to formalise the current state transitions + of the yarn lifecycle. (stevel) + + MAPREDUCE-3866. Fixed the bin/yarn script to not print the command line + unnecessarily. (vinodkv) + + MAPREDUCE-3730. Modified RM to allow restarted NMs to be able to join the + cluster without waiting for expiry. (Jason Lowe via vinodkv) + + MAPREDUCE-2793. Corrected AppIDs, JobIDs, TaskAttemptIDs to be of correct + format on the web pages. (Bikas Saha via vinodkv) + + MAPREDUCE-3614. Fixed MR AM to close history file quickly and send a correct + final state to the RM when it is killed. (Ravi Prakash via vinodkv) + + MAPREDUCE-3497. Added docs for YARN CLI. (tgraves via acmurthy) + + MAPREDUCE-3954. Added new envs to separate heap size for different daemons + started via bin scripts. (Robert Joseph Evans via vinodkv) + + MAPREDUCE-4025. AM can crash if task attempt reports bogus progress value + (Jason Lowe via bobby) + + MAPREDUCE-4034. Unable to view task logs on history server with + mapreduce.job.acl-view-job=* (Jason Lowe and Siddarth Seth via bobby) + + MAPREDUCE-4043. Secret keys set in Credentials are not seen by tasks + (Jason Lowe via bobby) + + MAPREDUCE-3989. Cap space usage of default log4j rolling policy. + (Patrick Hunt via eli) + + OPTIMIZATIONS + + MAPREDUCE-3901. Modified JobHistory records in YARN to lazily load job and + task reports so as to improve UI response times. (Siddarth Seth via vinodkv) + + MAPREDUCE-2855. Passing a cached class-loader to ResourceBundle creator to + minimize counter names lookup time. (Siddarth Seth via vinodkv) + + MAPREDUCE-3944. Change the history jobs/ webservice to return partial job + info for a significant performance improvement. (Robert Joseph Evans via + sseth) + + BUG FIXES + + MAPREDUCE-3918 proc_historyserver no longer in command line arguments for + HistoryServer (Jon Eagles via bobby) + + MAPREDUCE-3862. Nodemanager can appear to hang on shutdown due to lingering + DeletionService threads (Jason Lowe via bobby) + + MAPREDUCE-3680. FifoScheduler web service rest API can print out invalid + JSON. (B Anil Kumar via tgraves) + + MAPREDUCE-3852. Test TestLinuxResourceCalculatorPlugin failing. (Thomas + Graves via mahadev) + + MAPREDUCE-3864. Fix cluster setup docs for correct SecondaryNameNode + HTTPS parameters. (todd) + + MAPREDUCE-3583. Change pid to String and stime to BigInteger in order to + avoid NumberFormatException caused by overflow. (Zhihong Yu via szetszwo) + + MAPREDUCE-3634. Fixed all daemons to crash instead of hanging around when + their EventHandlers get exceptions. (vinodkv) + + MAPREDUCE-3798. Fixed failing TestJobCleanup.testCusomCleanup() and moved it + to the maven build. (Ravi Prakash via vinodkv) + + MAPREDUCE-3884. PWD should be first in the classpath of MR tasks (tucu) + + MAPREDUCE-3878. Null user on filtered jobhistory job page (Jonathon Eagles + via tgraves) + + MAPREDUCE-3738. MM can hang during shutdown if AppLogAggregatorImpl thread + dies unexpectedly (Jason Lowe via sseth) + + MAPREDUCE-3904 Job history produced with mapreduce.cluster.acls.enabled + false can not be viewed with mapreduce.cluster.acls.enabled true + (Jonathon Eagles via tgraves) + + MAPREDUCE-3910. Fixed a bug in CapacityScheduler LeafQueue which was causing + app-submission to fail. (John George via vinodkv) + + MAPREDUCE-3686. Fixed two bugs in Counters because of which web app displays + zero counter values for framework counters. (Bhallamudi Venkata Siva Kamesh + via vinodkv) + + MAPREDUCE-3913. RM application webpage is unresponsive after 2000 jobs + (Jason Lowe via tgraves) + + MAPREDUCE-3922. Fixed build to not compile 32bit container-executor binary + by default on all platforms. (Hitesh Shah via vinodkv) + + MAPREDUCE-3790. Broken pipe on streaming job can lead to truncated output for + a successful job (Jason Lowe via bobby) + + MAPREDUCE-3816. capacity scheduler web ui bar graphs for used capacity wrong + (tgraves via bobby) + + MAPREDUCE-3930. Fixed an NPE while accessing the AM page/webservice for a + task attempt without an assigned container. (Robert Joseph Evans via + sseth) + + MAPREDUCE-3931. Changed PB implementation of LocalResource to take locks + so that race conditions don't fail tasks by inadvertantly changing the + timestamps. (Siddarth Seth via vinodkv) + + MAPREDUCE-3687. If AM dies before it returns new tracking URL, proxy + redirects to http://N/A/ and doesn't return error code (Ravi Prakash via + bobby) + + MAPREDUCE-3920. Revise yarn default port number selection + (Dave Thompson via tgraves) + + MAPREDUCE-3903. Add support for mapreduce admin users. (Thomas Graves via + sseth) + + MAPREDUCE-3706. Fix circular redirect error in job-attempts page. (bobby + via acmurthy) + + MAPREDUCE-3896. Add user information to the delegation token issued by the + history server. (Vinod Kumar Vavilapalli via sseth) + + MAPREDUCE-3792. Fix "bin/mapred job -list" to display all jobs instead of + only the jobs owned by the user. (Jason Lowe via vinodkv) + + MAPREDUCE-3929. Fixed output of 'bin/mapred queue -showacl' command to + clarify ACLs for users. (John George via acmurthy) + + MAPREDUCE-3960. Fix web-proxy to forward request to AM with configured + hostname or IP. (tgraves via acmurthy) + + MAPREDUCE-3897. Fixed computation of maxActiveAppsPerUser for queues by + using capacity and not max-capacity since we are already scaling it by + userLimitFactor. (Eric Payne via acmurthy) + + MAPREDUCE-3009. Fixed node link on JobHistory webapp. (chackaravarthy via + vinodkv) + + MAPREDUCE-3964. ResourceManager does not have JVM metrics (Jason Lowe via + bobby) + + MAPREDUCE-3034. Ensure NodeManager reboots itself on direction from + ResourceManager. (Devaraj K & Eric Payne via acmurthy) + + MAPREDUCE-3976. TestRMContainerAllocator failing (Jason Lowe via bobby) + + MAPREDUCE-3961. Map/ReduceSlotMillis computation incorrect (Siddharth Seth + via bobby) + + MAPREDUCE-3977. LogAggregationService leaks log aggregator objects + (Jason Lowe via bobby) + + MAPREDUCE-3975. Default value not set for Configuration parameter + mapreduce.job.local.dir (Eric Payne via bobby) + + MAPREDUCE-3982. Fixed FileOutputCommitter to not err out for an 'empty-job' + whose tasks don't write any outputs. (Robert Joseph Evans via vinodkv) + + MAPREDUCE-4005. Fixes broken AM container logs URL on ResourceManager + Application Info page. (Jason Lowe via sseth) + + MAPREDUCE-4006. History server container log web UI sometimes combines + stderr/stdout/syslog contents together (Siddharth Seth via tgraves) + + MAPREDUCE-4061. RM only has 1 AM launcher thread (tgraves via bobby) + +Release 0.23.1 - 2012-02-17 + + NEW FEATURES + + MAPREDUCE-778. Rumen Anonymizer. (Amar Kamat and Chris Douglas via amarrk) + + MAPREDUCE-3121. NodeManager should handle disk-failures (Ravi Gummadi via mahadev) + + MAPREDUCE-2863. Support web services for YARN and MR components. (Thomas + Graves via vinodkv) + + MAPREDUCE-3251. Network ACLs can prevent some clients to talk to MR ApplicationMaster. + (Anupam Seth via mahadev) + + MAPREDUCE-778. Rumen Anonymizer. (Amar Kamat and Chris Douglas via amarrk) + + IMPROVEMENTS + + MAPREDUCE-3481. [Gridmix] Improve Gridmix STRESS mode. (amarrk) + + MAPREDUCE-3597. [Rumen] Rumen should provide APIs to access all the + job-history related information. + + MAPREDUCE-3375. [Gridmix] Memory Emulation system tests. + (Vinay Thota via amarrk) + + MAPREDUCE-3840. JobEndNotifier doesn't use the proxyToUse during connecting + (Ravi Prakash via bobby) + + MAPREDUCE-3736. Variable substitution depth too large for fs.default.name + causes jobs to fail (ahmed via tucu). + + MAPREDUCE-2733. [Gridmix] Gridmix3 cpu emulation system tests. + (Vinay Thota via amarrk) + + MAPREDUCE-3297. Moved log related components into yarn-common so that + HistoryServer and clients can use them without depending on the + yarn-server-nodemanager module. (Siddharth Seth via vinodkv) + + MAPREDUCE-3336. Replaced guice internal.Preconditions api usage with the + public Preconditions API. (Thomas Graves via vinodkv) + + MAPREDUCE-3280. Removed the unnecessary job user-name configuration in + mapred-site.xml. (vinodkv) + + MAPREDUCE-3370. Fixed MiniMRYarnCluster and related tests to not use + a hard-coded path for the mr-app jar. (Ahmed Radwan via vinodkv) + + MAPREDUCE-3325. Improvements to CapacityScheduler doc. (Thomas Graves + via mahadev) + + MAPREDUCE-3341. Enhance logging of initalized queue limit values. + (Anupam Seth via mahadev) + + MAPREDUCE-3243. Invalid tracking URL for streaming jobs (Jonathan Eagles + via mahadev) + + MAPREDUCE-3331. Improvement to single node cluster setup documentation for + 0.23 (Anupam Seth via mahadev) + + MAPREDUCE-3102. Changed NodeManager to fail fast when LinuxContainerExecutor + has wrong configuration or permissions. (Hitesh Shah via vinodkv) + + MAPREDUCE-3415. improve MiniMRYarnCluster & DistributedShell JAR + resolution. (tucu) + + MAPREDUCE-3169. Create a new MiniMRCluster equivalent which only provides + client APIs cross MR1 and MR2. (Ahmed via tucu) + + MAPREDUCE-3373. Hadoop scripts unconditionally source + "$bin"/../libexec/hadoop-config.sh. (Bruno Mahé via tomwhite) + + MAPREDUCE-3372. HADOOP_PREFIX cannot be overridden. + (Bruno Mahé via tomwhite) + + MAPREDUCE-3411. Performance Upgrade for jQuery (Jonathan Eagles via + mahadev) + + MAPREDUCE-3371. Review and improve the yarn-api javadocs. (Ravi Prakash + via mahadev) + + MAPREDUCE-3238. Small cleanup in SchedulerApp. (Todd Lipcon via mahadev) + + MAPREDUCE-3413. RM web ui applications not sorted in any order by default. + (Jonathan Eagles via mahadev) + + MAPREDUCE-3045. Fixed UI filters to not filter on hidden title-numeric + sort fields. (Jonathan Eagles via sseth) + + MAPREDUCE-3448. TestCombineOutputCollector javac unchecked warning on mocked + generics (Jonathan Eagles via mahadev) + + MAPREDUCE-3169 amendment. Deprecate MiniMRCluster. (Ahmed Radwan via + sseth) + + MAPREDUCE-3369. Migrate MR1 tests to run on MR2 using the new interfaces + introduced in MAPREDUCE-3169. (Ahmed Radwan via tomwhite) + + MAPREDUCE-3518. mapred queue -info -showJobs throws NPE. + (Jonathan Eagles via mahadev) + + MAPREDUCE-3391. Making a trivial change to correct a log message in + DistributedShell app's AM. (Subroto Sanyal via vinodkv) + + MAPREDUCE-3547. Added a bunch of unit tests for the the RM/NM webservices. + (Thomas Graves via acmurthy) + + MAPREDUCE-3610. Remove use of the 'dfs.block.size' config for default block + size fetching. Use FS#getDefaultBlocksize instead. (Sho Shimauchi via harsh) + + MAPREDUCE-3478. Cannot build against ZooKeeper 3.4.0. (Tom White via mahadev) + + MAPREDUCE-3528. Fixed TaskHeartBeatHandler to use a new configuration + for the thread loop interval separate from task-timeout configuration + property. (Siddharth Seth via vinodkv) + + MAPREDUCE-3312. Modified MR AM to not send a stop-container request for + a container that isn't launched at all. (Robert Joseph Evans via vinodkv) + + MAPREDUCE-3382. Enhanced MR AM to use a proxy to ping the job-end + notification URL. (Ravi Prakash via vinodkv) + + MAPREDUCE-3299. Added AMInfo table to the MR AM job pages to list all the + job-attempts when AM restarts and recovers. (Jonathan Eagles via vinodkv) + + MAPREDUCE-3251. Network ACLs can prevent some clients to talk to MR AM. + Improved the earlier patch to not to JobHistoryServer repeatedly. + (Anupam Seth via vinodkv) + + MAPREDUCE-3553. Add support for data returned when exceptions thrown from web + service apis to be in either xml or in JSON. (Thomas Graves via mahadev) + + MAPREDUCE-3641. Making CapacityScheduler more conservative so as to + assign only one off-switch container in a single scheduling + iteration. (Arun C Murthy via vinodkv) + + MAPREDUCE-3692. yarn-resourcemanager out and log files can get big. (eli) + + MAPREDUCE-3710. Improved FileInputFormat to return better locality for the + last split. (Siddarth Seth via vinodkv) + + MAPREDUCE-2765. DistCp Rewrite. (Mithun Radhakrishnan via mahadev) + + MAPREDUCE-3737. The Web Application Proxy's is not documented very well. + (Robert Evans via mahadev) + + MAPREDUCE-3699. Increased RPC handlers for all YARN servers to reasonable + values for working at scale. (Hitesh Shah via vinodkv) + + MAPREDUCE-3693. Added mapreduce.admin.user.env to mapred-default.xml. + (Roman Shapshonik via acmurthy) + + MAPREDUCE-3732. Modified CapacityScheduler to use only users with pending + requests for computing user-limits. (Arun C Murthy via vinodkv) + + MAPREDUCE-3679. AM logs and others should not automatically refresh after every 1 + second. (Vinod KV via mahadev) + + MAPREDUCE-3754. Modified RM UI to filter applications based on state of the + applications. (vinodkv) + + MAPREDUCE-3774. Moved yarn-default.xml to hadoop-yarn-common from + hadoop-server-common. (Mahadev Konar via vinodkv) + + MAPREDUCE-3771. Un-deprecated the old mapred apis, port of MAPREDUCE-1735. + (acmurthy) + + MAPREDUCE-3784. Fixed CapacityScheduler so that maxActiveApplications and + maxActiveApplicationsPerUser per queue are not too low for small + clusters. (Arun C Murthy via vinodkv) + + OPTIMIZATIONS + + MAPREDUCE-3567. Extraneous JobConf objects in AM heap. (Vinod Kumar + Vavilapalli via sseth) + + MAPREDUCE-3399. Modifying ContainerLocalizer to send a heartbeat to NM + immediately after downloading a resource instead of always waiting for a + second. (Siddarth Seth via vinodkv) + + MAPREDUCE-3568. Optimized Job's progress calculations in MR AM. (vinodkv) + + MAPREDUCE-3569. TaskAttemptListener holds a global lock for all + task-updates. (Vinod Kumar Vavilapalli via sseth) + + MAPREDUCE-3511. Removed a multitude of cloned/duplicate counters in the AM + thereby reducing the AM heap size and preventing full GCs. (vinodkv) + + MAPREDUCE-3618. Fixed TaskHeartbeatHandler to not hold a global lock for all + task-updates. (Siddarth Seth via vinodkv) + + MAPREDUCE-3512. Batching JobHistory flushing to DFS so that we don't flush + for every event slowing down AM. (Siddarth Seth via vinodkv) + + MAPREDUCE-3718. Change default AM heartbeat interval to 1 second. (Hitesh + Shah via sseth) + + MAPREDUCE-3360. Added information about lost/rebooted/decommissioned nodes + on the webapps. (Bhallamudi Venkata Siva Kamesh and Jason Lowe via vinodkv) + + MAPREDUCE-3756. Made single shuffle limit configurable. (Hitesh Shah via + acmurthy) + + MAPREDUCE-3811. Made jobclient-to-AM retries configurable. (sseth via + acmurthy) + + BUG FIXES + + MAPREDUCE-2784. [Gridmix] Bug fixes in ExecutionSummarizer and + ResourceUsageMatcher. (amarrk) + + MAPREDUCE-3194. "mapred mradmin" command is broken in mrv2 + (Jason Lowe via bobby) + + MAPREDUCE-3462. Fix Gridmix JUnit testcase failures. + (Ravi Prakash and Ravi Gummadi via amarrk) + + MAPREDUCE-2950. [Rumen] Fixed TestUserResolve. (Ravi Gummadi via amarrk) + + MAPREDUCE-3412. Fix 'ant docs'. (amarrk) + + MAPREDUCE-3346 [Rumen] LoggedTaskAttempt#getHostName() returns null. + (amarrk) + + MAPREDUCE-3221. Reenabled the previously ignored test in TestSubmitJob + and fixed bugs in it. (Devaraj K via vinodkv) + + MAPREDUCE-3215. Reenabled and fixed bugs in the failing test + TestNoJobSetupCleanup. (Hitesh Shah via vinodkv) + + MAPREDUCE-3219. Reenabled and fixed bugs in the failing test + TestDelegationToken. (Hitesh Shah via vinodkv) + + MAPREDUCE-3217. Reenabled and fixed bugs in the failing ant test + TestAuditLogger. (Devaraj K via vinodkv) + + MAPREDUCE-3291. App fail to launch due to delegation token not + found in cache (Robert Evans via mahadev) + + MAPREDUCE-3344. o.a.h.mapreduce.Reducer since 0.21 blindly casts to + ReduceContext.ValueIterator. (Brock Noland via tomwhite) + + MAPREDUCE-3342. Fixed JobHistoryServer to also show the job's queue + name. (Jonathan Eagles via vinodkv) + + MAPREDUCE-3345. Fixed a race condition in ResourceManager that was causing + TestContainerManagerSecurity to fail sometimes. (Hitesh Shah via vinodkv) + + MAPREDUCE-3368. Fixed test compilation. (Hitesh Shah via vinodkv) + + MAPREDUCE-3333. Fixed bugs in ContainerLauncher of MR AppMaster due to + which per-container connections to NodeManager were lingering long enough + to hit the ulimits on number of processes. (vinodkv) + + MAPREDUCE-3392. Fixed Cluster's getDelegationToken's API to return null + when there isn't a supported token. (John George via vinodkv) + + MAPREDUCE-3379. Fixed LocalResourceTracker in NodeManager to remove deleted + cache entries correctly. (Siddharth Seth via vinodkv) + + MAPREDUCE-3324. Not All HttpServer tools links (stacks,logs,config,metrics) are + accessible through all UI servers (Jonathan Eagles via mahadev) + + MAPREDUCE-3355. Fixed MR AM's ContainerLauncher to handle node-command + timeouts correctly. (vinodkv) + + MAPREDUCE-3407. Fixed pom files to refer to the correct MR app-jar needed + by the integration tests. (Hitesh Shah via vinodkv) + + MAPREDUCE-3437. Fix examples pom to refer to the correct 0.23 snapshot + version. (Jonathan Eagles via todd) + + MAPREDUCE-3434. Nightly build broken (Hitesh Shah via mahadev) + + MAPREDUCE-3447. mapreduce examples not working (mahadev) + + MAPREDUCE-3444. trunk/0.23 builds broken (Hitesh Shah via mahadev) + + MAPREDUCE-3454. [Gridmix] TestDistCacheEmulation is broken (Hitesh Shah + via mahadev) + + MAPREDUCE-3408. yarn-daemon.sh unconditionnaly sets yarn.root.logger + (Bruno Mahe via mahadev) + + MAPREDUCE-3329. Fixed CapacityScheduler to ensure maximum-capacity cannot + be lesser than capacity for any queue. (acmurthy) + + MAPREDUCE-3464. mapreduce jsp pages missing DOCTYPE. (Dave Vronay via mattf) + + MAPREDUCE-3265. Removed debug logs during job submission to LOG.debug to + cut down noise. (acmurthy) + + MAPREDUCE-3468. Changed ant based infrastructure to use 0.23.1 version. + (sseth via acmurthy) + + MAPREDUCE-3433. Finding counters by legacy group name returns empty + counters. (tomwhite) + + MAPREDUCE-3450. NM port info no longer available in JobHistory. + (Siddharth Seth via mahadev) + + MAPREDUCE-3477. Hadoop site documentation cannot be built anymore. + (jeagles via tucu) + + MAPREDUCE-3488. Streaming jobs are failing because the main class + isnt set in the pom files. (mahadev) + + MAPREDUCE-3463. Second AM fails to recover properly when first AM is killed with + java.lang.IllegalArgumentException causing lost job. (Siddharth Seth via mahadev) + + MAPREDUCE-3452. fifoscheduler web ui page always shows 0% used for the queue. + (Jonathan Eagles via mahadev) + + MAPREDUCE-3443. JobClient and Job should function in the context of the + UGI which created them. (Mahadev Konar via sseth) + + MAPREDUCE-3460. MR AM can hang if containers are allocated on a node + blacklisted by the AM. (Hitesh Shah and Robert Joseph Evans via sseth) + + MAPREDUCE-3453. RM web ui application details page shows RM cluster about + information. (Jonathan Eagles via sseth) + + MAPREDUCE-3479. JobClient#getJob cannot find local jobs. (tomwhite) + + MAPREDUCE-3500. MRJobConfig creates an LD_LIBRARY_PATH using the platform ARCH. (tucu) + + MAPREDUCE-3456. $HADOOP_PREFIX/bin/yarn should set defaults for + $HADOOP_*_HOME (Eric Payne via mahadev) + + MAPREDUCE-3458. Fix findbugs warnings in hadoop-examples. (Devaraj K + via mahadev) + + MAPREDUCE-3485. DISKS_FAILED -101 error code should be defined in same location as + ABORTED_CONTAINER_EXIT_STATUS. (Ravi Gummadi via mahadev) + + MAPREDUCE-3389. MRApps loads the 'mrapp-generated-classpath' file with + classpath from the build machine. (tucu) + + MAPREDUCE-3496. Fixed client to print queue acls in consistent order. + (Jonathan Eagles via acmurthy) + + MAPREDUCE-3147. Handle leaf queues with the same name properly. + (Ravi Prakash via mahadev) + + MAPREDUCE-3327. RM web ui scheduler link doesn't show correct max value + for queues (Anupam Seth via mahadev) + + MAPREDUCE-3513. Capacity Scheduler web UI has a spelling mistake for Memory. + (chackaravarthy via mahadev) + + MAPREDUCE-3519. Fixed a deadlock in NodeManager LocalDirectories's handling + service. (Ravi Gummadi via vinodkv) + + MAPREDUCE-3527. Fix minor API incompatibilities between 1.0 and 0.23. + (tomwhite) + + MAPREDUCE-3328. mapred queue -list output inconsistent and missing child + queues. (Ravi Prakash via mahadev) + + MAPREDUCE-3510. Capacity Scheduler inherited ACLs not displayed by mapred queue + -showacls (Jonathan Eagles via mahadev) + + MAPREDUCE-3537. Fix race condition in DefaultContainerExecutor which led + to container localization occuring in wrong directories. (acmurthy) + + MAPREDUCE-3542. Support "FileSystemCounter" legacy counter group name for + compatibility. (tomwhite) + + MAPREDUCE-3426. Fixed MR AM in uber mode to write map intermediate outputs + in the correct directory to work properly in secure mode. (Hitesh Shah via + vinodkv) + + MAPREDUCE-3544. gridmix build is broken, requires hadoop-archives to be + added as ivy dependency. (tucu) + + MAPREDUCE-3557. MR1 test fail to compile because of missing hadoop-archives + dependency. (tucu) + + MAPREDUCE-3541. Fix broken TestJobQueueClient test. (Ravi Prakash via + mahadev) + + MAPREDUCE-3398. Fixed log aggregation to work correctly in secure mode. + (Siddharth Seth via vinodkv) + + MAPREDUCE-3530. Fixed an NPE occuring during scheduling in the + ResourceManager. (Arun C Murthy via vinodkv) + + MAPREDUCE-3484. Fixed JobEndNotifier to not get interrupted before completing + all its retries. (Ravi Prakash via vinodkv) + + MAPREDUCE-3531. Fixed a race in ContainerTokenSecretManager. (Robert Joseph + Evans via sseth) + + MAPREDUCE-3560. TestRMNodeTransitions is failing on trunk. + (Siddharth Seth via mahadev) + + MAPREDUCE-3487. Fixed JobHistory web-UI to display links to single task's + counters' page. (Jason Lowe via vinodkv) + + MAPREDUCE-3564. Fixed failures in TestStagingCleanup and TestJobEndNotifier + tests. (Siddharth Seth via vinodkv) + + MAPREDUCE-3422. Counter display names are not being picked up. (Jonathan + Eagles via sseth) + + MAPREDUCE-3366. Mapreduce component should use consistent directory structure + layout as HDFS/common (Eric Yang via mahadev) + + MAPREDUCE-3387. Fixed AM's tracking URL to always go through the proxy, even + before the job started, so that it works properly with oozie throughout + the job execution. (Robert Joseph Evans via vinodkv) + + MAPREDUCE-3579. ConverterUtils shouldn't include a port in a path from a url + without a port. (atm via harsh) + + MAPREDUCE-3563. Fixed LocalJobRunner to work correctly with new mapreduce + apis. (acmurthy) + + MAPREDUCE-3376. Fixed Task to ensure it passes reporter to combiners using + old MR api. (Subroto Sanyal via acmurthy) + + MAPREDUCE-3339. Fixed MR AM to stop considering node blacklisting after the + number of nodes blacklisted crosses a threshold. (Siddharth Seth via vinodkv) + + MAPREDUCE-3588. Fixed bin/yarn which was broken by MAPREDUCE-3366 so that + yarn daemons can start. (Arun C Murthy via vinodkv) + + MAPREDUCE-3349. Log rack-name in JobHistory for unsuccessful tasks. (Amar + Kamat and Devaraj K via sseth) + + MAPREDUCE-3586. Modified CompositeService to avoid duplicate stop operations + thereby solving race conditions in MR AM shutdown. (vinodkv) + + MAPREDUCE-3604. Fixed streaming to use new mapreduce.framework.name to + check for local mode. (acmurthy) + + MAPREDUCE-3521. Fixed streaming to ensure it doesn't silently ignore + unknown arguments. (Robert Evans via acmurthy) + + MAPREDUCE-3522. Ensure queues inherit ACLs from parent if they aren't + explicitly specified. (Jonathan Eagles via acmurthy) + + MAPREDUCE-3608. Fixed compile issue with MAPREDUCE-3522. (mahadev via + acmurthy) + + MAPREDUCE-3490. Fixed MapReduce AM to count failed maps also towards Reduce + ramp up. (Sharad Agarwal and Arun C Murthy via vinodkv) + + MAPREDUCE-3529. TokenCache does not cache viewfs credentials correctly + (sseth) + + MAPREDUCE-3595. Add missing TestCounters#testCounterValue test from branch + 1 to 0.23 (Tom White via sseth) + + MAPREDUCE-3566. Fixed MR AM to construct CLC only once across all tasks. + (vinodkv via acmurthy) + + MAPREDUCE-3572. Moved AM event dispatcher to a separate thread for + performance reasons. (vinodkv via acmurthy) + + MAPREDUCE-3615. Fix some ant test failures. (Thomas Graves via sseth) + + MAPREDUCE-1744. DistributedCache creates its own FileSytem instance when + adding a file/archive to the path. (Dick King via tucu) + + MAPREDUCE-3326. Added detailed information about queue's to the + CapacityScheduler web-ui. (Jason Lowe via acmurthy) + + MAPREDUCE-3548. Added more unit tests for MR AM & JHS web-services. + (Thomas Graves via acmurthy) + + MAPREDUCE-3617. Removed wrong default value for + yarn.resourcemanager.principal and yarn.nodemanager.principal. (Jonathan + Eagles via acmurthy) + + MAPREDUCE-3624. Remove unnecessary dependency on JDK's tools.jar. (mahadev + via acmurthy) + + MAPREDUCE-3616. Thread pool for launching containers in MR AM not + expanding as expected. (vinodkv via sseth) + + MAPREDUCE-3639. Fixed TokenCache to work with absent FileSystem canonical + service-names. (Siddharth Seth via vinodkv) + + MAPREDUCE-3380. Token infrastructure for running clients which are not kerberos + authenticated. (mahadev) + + MAPREDUCE-3648. TestJobConf failing. (Thomas Graves via mahadev) + + MAPREDUCE-3651. TestQueueManagerRefresh fails. (Thomas Graves via mahadev) + + MAPREDUCE-3645. TestJobHistory fails. (Thomas Graves via mahadev) + + MAPREDUCE-3652. org.apache.hadoop.mapred.TestWebUIAuthorization.testWebUIAuthorization + fails. (Thomas Graves via mahadev) + + MAPREDUCE-3625. CapacityScheduler web-ui display of queue's used capacity is broken. + (Jason Lowe via mahadev) + + MAPREDUCE-3596. Fix scheduler to handle cleaned up containers, which NMs + may subsequently report as running. (Vinod Kumar Vavilapalli via sseth) + + MAPREDUCE-3656. Fixed a race condition in MR AM which is failing the sort + benchmark consistently. (Siddarth Seth via vinodkv) + + MAPREDUCE-3532. Modified NM to report correct http address when an ephemeral + web port is configured. (Bhallamudi Venkata Siva Kamesh via vinodkv) + + MAPREDUCE-3404. Corrected MR AM to honor speculative configuration and enable + speculating either maps or reduces. (Eric Payne via vinodkv) + + MAPREDUCE-3664. Federation Documentation has incorrect configuration example. + (Brandon Li via jitendra) + + MAPREDUCE-3649. Job End notification gives an error on calling back. + (Ravi Prakash via mahadev) + + MAPREDUCE-3657. State machine visualize build fails. (Jason Lowe + via mahadev) + + MAPREDUCE-2450. Fixed a corner case with interrupted communication threads + leading to a long timeout in Task. (Rajesh Balamohan via acmurthy) + + MAPREDUCE-3669. Allow clients to talk to MR HistoryServer using both + delegation tokens and kerberos. (mahadev via acmurthy) + + MAPREDUCE-3684. LocalDistributedCacheManager does not shut down its thread + pool (tomwhite) + + MAPREDUCE-3582. Move successfully passing MR1 tests to MR2 maven tree. + (ahmed via tucu) + + MAPREDUCE-3698. Client cannot talk to the history server in secure mode. + (mahadev) + + MAPREDUCE-3689. RM web UI doesn't handle newline in job name. + (Thomas Graves via mahadev) + + MAPREDUCE-3701. Delete HadoopYarnRPC from 0.23 branch. + (mahadev) + + MAPREDUCE-3549. write api documentation for web service apis for RM, NM, + mapreduce app master, and job history server (Thomas Graves via mahadev) + + MAPREDUCE-3705. ant build fails on 0.23 branch. (Thomas Graves via + mahadev) + + MAPREDUCE-3691. webservices add support to compress response. + (Thomas Graves via mahadev) + + MAPREDUCE-3702. internal server error trying access application master + via proxy with filter enabled (Thomas Graves via mahadev) + + MAPREDUCE-3646. Remove redundant URL info from "mapred job" output. + (Jonathan Eagles via mahadev) + + MAPREDUCE-3681. Fixed computation of queue's usedCapacity. (acmurthy) + + MAPREDUCE-3505. yarn APPLICATION_CLASSPATH needs to be overridable. + (ahmed via tucu) + + MAPREDUCE-3714. Fixed EventFetcher and Fetcher threads to shut-down properly + so that reducers don't hang in corner cases. (vinodkv) + + MAPREDUCE-3712. The mapreduce tar does not contain the hadoop-mapreduce-client- + jobclient-tests.jar. (mahadev) + + MAPREDUCE-3717. JobClient test jar has missing files to run all the test programs. + (mahadev) + + MAPREDUCE-3630. Fixes a NullPointer exception while running TeraGen - if a + map is asked to generate 0 records. (Mahadev Konar via sseth) + + MAPREDUCE-3683. Fixed maxCapacity of queues to be product of parent + maxCapacities. (acmurthy) + + MAPREDUCE-3713. Fixed the way head-room is allocated to applications by + CapacityScheduler so that it deducts current-usage per user and not + per-application. (Arun C Murthy via vinodkv) + + MAPREDUCE-3721. Fixed a race in shuffle which caused reduces to hang. + (sseth via acmurthy) + + MAPREDUCE-3733. Add Apache License Header to hadoop-distcp/pom.xml. + (mahadev) + + MAPREDUCE-3735. Add distcp jar to the distribution (tar). + (mahadev) + + MAPREDUCE-3720. Changed bin/mapred job -list to not print job-specific + information not available at RM. (vinodkv via acmurthy) + + MAPREDUCE-3742. "yarn logs" command fails with ClassNotFoundException. + (Jason Lowe via mahadev) + + MAPREDUCE-3703. ResourceManager should provide node lists in JMX output. + (Eric Payne via mahadev) + + MAPREDUCE-3716. Fixing YARN+MR to allow MR jobs to be able to use + java.io.File.createTempFile to create temporary files as part of their + tasks. (Jonathan Eagles via vinodkv) + + MAPREDUCE-3748. Changed a log in CapacityScheduler.nodeUpdate to debug. + (ramya via acmurthy) + + MAPREDUCE-3764. Fixed resource usage metrics for queues and users. + (acmurthy) + + MAPREDUCE-3749. ConcurrentModificationException in counter groups. + (tomwhite) + + MAPREDUCE-3762. Fixed default CapacityScheduler configs. (mahadev via + acmurthy) + + MAPREDUCE-3499. New MiniMR does not setup proxyuser configuration + correctly, thus tests using doAs do not work. (johnvijoe via tucu) + + MAPREDUCE-3696. MR job via oozie does not work on hadoop 23. + (John George via mahadev) + + MAPREDUCE-3427. Fix streaming unit tests broken after mavenization. + (Hitesh Shah via acmurthy) + + MAPREDUCE-3640. Allow AMRecovery to work with partial JobHistory files. + (Arun C Murthy via sseth) + + MAPREDUCE-3752. Modified application limits to include queue max-capacities + besides the usual user limits. (Arun C Murthy via vinodkv) + + MAPREDUCE-3744. Fix the yarn logs command line. Improve error messages for + mapred job -logs. (Jason Lowe via sseth) + + MAPREDUCE-3780. Fixed a bug where applications killed before getting + activated were not getting cleaned up properly. (Hitesh Shah via acmurthy) + + MAPREDUCE-3708. Metrics: Incorrect Apps Submitted Count (Bhallamudi via + mahadev) + + MAPREDUCE-3727. jobtoken location property in jobconf refers to wrong + jobtoken file (tucu) + + MAPREDUCE-3711. Fixed MR AM recovery so that only single selected task + output is recovered and thus reduce the unnecessarily bloated recovery + time. (Robert Joseph Evans via vinodkv) + + MAPREDUCE-3760. Changed active nodes list to not contain unhealthy nodes + on the webUI and metrics. (vinodkv) + + MAPREDUCE-3417. Fixed job-access-controls to work with MR AM and + JobHistoryServer web-apps. (Jonathan Eagles via vinodkv) + + MAPREDUCE-3803. Fix broken build of raid contrib due to HDFS-2864. + (Ravi Prakash via suresh) + + MAPREDUCE-3791. can't build site in hadoop-yarn-server-common. + (mahadev) + + MAPREDUCE-3723. TestAMWebServicesJobs & TestHSWebServicesJobs + incorrectly asserting tests (Bhallamudi Venkata Siva Kamesh + via mahadev) + + MAPREDUCE-3795. "job -status" command line output is malformed. + (vinodkv via mahadev) + + MAPREDUCE-3759. ClassCastException thrown in -list-active-trackers when + there are a few unhealthy nodes (vinodkv via mahadev) + + MAPREDUCE-3775. Change MiniYarnCluster to escape special chars in testname. + (Hitesh Shah via mahadev) + + MAPREDUCE-3765. FifoScheduler does not respect yarn.scheduler.fifo.minimum- + allocation-mb setting (Hitesh Shah via mahadev) + + MAPREDUCE-3747. Initialize queue metrics upfront and added start/finish + time to RM Web-UI. (acmurthy) + + MAPREDUCE-3814. Fixed MRV1 compilation. (Arun C Murthy via vinodkv) + + MAPREDUCE-3810. Performance tweaks - reduced logging in AM and defined + hascode/equals for ResourceRequest & Priority. (vinodkv via acmurthy) + + MAPREDUCE-3813. Added a cache for resolved racks. (vinodkv via acmurthy) + + MAPREDUCE-3808. Fixed an NPE in FileOutputCommitter for jobs with maps + but no reduces. (Robert Joseph Evans via vinodkv) + + MAPREDUCE-3804. yarn webapp interface vulnerable to cross scripting attacks + (Dave Thompson via bobby) + + MAPREDUCE-3354. Changed scripts so that jobhistory server is started by + bin/mapred instead of bin/yarn. (Jonathan Eagles via acmurthy) + + MAPREDUCE-3809. Ensure that there is no needless sleep in Task at the end + of the task. (sseth via acmurthy) + + MAPREDUCE-3794. Support mapred.Task.Counter and mapred.JobInProgress.Counter + enums for compatibility (Tom White via mahadev) + + MAPREDUCE-3697. Support binary compatibility for Counters after + MAPREDUCE-901. (mahadev via acmurthy) + + MAPREDUCE-3817. Fixed bin/mapred to allow running of distcp and archive + jobs. (Arpit Gupta via acmurthy) + + MAPREDUCE-3709. TestDistributedShell is failing. (Hitesh Shah via + mahadev) + + MAPREDUCE-3436. JobHistory webapp address should use the host configured + in the jobhistory address. (Ahmed Radwan via sseth) + + MAPREDUCE-3815. Fixed MR AM to always use hostnames and never IPs when + requesting containers so that scheduler can give off data local containers + correctly. (Siddarth Seth via vinodkv) + + MAPREDUCE-3833. Fixed a bug in reinitiaziling of queues. (Jason Lowe via + acmurthy) + + MAPREDUCE-3826. Fixed a bug in RM web-ui which broke sorting. (Jonathan + Eagles via acmurthy) + + MAPREDUCE-3823. Ensure counters are calculated only once after a job + finishes. (Vinod Kumar Vavilapalli via sseth) + + MAPREDUCE-3827. Changed Counters to use ConcurrentSkipListMap for + performance. (vinodkv via acmurthy) + + MAPREDUCE-3822. Changed FS counter computation to use all occurences of + the same FS scheme, instead of randomly using one. (Mahadev Konar via + sseth) + + MAPREDUCE-3834. Changed MR AM to not add the same rack entry multiple times + into the container request table when multiple hosts for a split happen to + be on the same rack. (Siddarth Seth via vinodkv) + + MAPREDUCE-3828. Ensure that urls in single-node mode are correct. (sseth + via acmurthy) + + MAPREDUCE-3770. Zombie.getJobConf() results into NPE. (amarrk) + + MAPREDUCE-3843. Job summary log file found missing on the RM host + (Anupam Seth via tgraves) + + MAPREDUCE-3846. Addressed MR AM hanging issues during AM restart and then + the recovery. (vinodkv) + + MAPREDUCE-3802. Added test to validate that AM can crash multiple times and + still can recover successfully after MAPREDUCE-3846. (vinodkv) + + MAPREDUCE-3858. Task attempt failure during commit results in task never completing. + (Tom White via mahadev) + + MAPREDUCE-3856. Instances of RunningJob class givs incorrect job tracking + urls when mutiple jobs are submitted from same client jvm. (Eric Payne via + sseth) + + MAPREDUCE-3880. Changed LCE binary to be 32-bit. (acmurthy) + +Release 0.23.0 - 2011-11-01 + + INCOMPATIBLE CHANGES + + MAPREDUCE-2455. Remove deprecated JobTracker.State in favour of + JobTrackerStatus. (tomwhite) + + MAPREDUCE-2430. Remove mrunit contrib. (nigel via eli) + + MAPREDUCE-2606. Remove IsolationRunner. (Alejandro Abdelnur via eli) + + NEW FEATURES + + MAPREDUCE-2682. Add "mapred classpath" command to print classpath + for MR applications. (vinodkv via acmurthy) + + MAPREDUCE-2107. [Gridmix] Total heap usage emulation in Gridmix. + (Amar Kamat and Ravi Gummadi via amarrk) + + MAPREDUCE-2106. [Gridmix] Cumulative CPU usage emulation in Gridmix. + (amarrk) + + MAPREDUCE-2543. [Gridmix] High-Ram feature emulation in Gridmix. (amarrk) + + MAPREDUCE-2408. [Gridmix] Compression emulation in Gridmix. (amarrk) + + MAPREDUCE-2473. Add "mapred groups" command to query the server-side groups + resolved for a user. (Aaron T. Myers via todd) + + MAPREDUCE-461. Enable ServicePlugins for the JobTracker. + (Fredrik Hedberg via tomwhite) + + MAPREDUCE-2521. Create RPM and Debian packages for MapReduce. Changes + deployment layout to be consistent across the binary tgz, rpm, and deb. + (Eric Yang via omalley) + + MAPREDUCE-2323. Add metrics to the fair scheduler. (todd) + + MAPREDUCE-2037. Capture intermediate progress, CPU and memory usage for + tasks. (Dick King via acmurthy) + + MAPREDUCE-279. MapReduce 2.0. Merging MR-279 branch into trunk. Contributed by + Arun C Murthy, Christopher Douglas, Devaraj Das, Greg Roelofs, Jeffrey + Naisbitt, Josh Wills, Jonathan Eagles, Krishna Ramachandran, Luke Lu, Mahadev + Konar, Robert Evans, Sharad Agarwal, Siddharth Seth, Thomas Graves, and Vinod + Kumar Vavilapalli. + + MAPREDUCE-2930. Added the ability to be able to generate graphs from the + state-machine definitions. (Binglin Chang via vinodkv) + + MAPREDUCE-2719. Add a simple, DistributedShell, application to illustrate + alternate frameworks on YARN. (Hitesh Shah via acmurthy) + + MAPREDUCE-3104. Implemented Application-acls. (vinodkv) + + MAPREDUCE-2708. Designed and implemented MR Application Master recovery to + make MR AMs resume their progress after restart. (Sharad Agarwal via vinodkv) + + MAPREDUCE-2858. Added a WebApp Proxy for applications. (Robert Evans via + acmurthy) + + IMPROVEMENTS + + MAPREDUCE-2187. Reporter sends progress during sort/merge. (Anupam Seth via + acmurthy) + + MAPREDUCE-2365. Add counters to track bytes (read,written) via + File(Input,Output)Format. (Siddharth Seth via acmurthy) + + MAPREDUCE-2680. Display queue name in job client CLI. (acmurthy) + + MAPREDUCE-2679. Minor changes to sync trunk with MR-279 branch. (acmurthy) + + MAPREDUCE-2400. Remove Cluster's dependency on JobTracker via a + ServiceProvider for the actual implementation. (tomwhite via acmurthy) + + MAPREDUCE-2596. [Gridmix] Summarize Gridmix runs. (amarrk) + + MAPREDUCE-2563. [Gridmix] Add High-Ram emulation system tests to + Gridmix. (Vinay Kumar Thota via amarrk) + + MAPREDUCE-2104. [Rumen] Add Cpu, Memory and Heap usages to + TraceBuilder's output. (amarrk) + + MAPREDUCE-2554. [Gridmix] Add distributed cache emulation system tests + to Gridmix. (Vinay Kumar Thota via amarrk) + + MAPREDUCE-2543. [Gridmix] High-Ram feature emulation testcase. (amarrk) + + MAPREDUCE-2469. Task counters should also report the total heap usage of + the task. (Ravi Gummadi and Amar Ramesh Kamat via amarrk) + + MAPREDUCE-2544. [Gridmix] Add compression emulation system tests to + Gridmix. (Vinay Kumar Thota via amarrk) + + MAPREDUCE-2517. [Gridmix] Add system tests to Gridmix. + (Vinay Kumar Thota via amarrk) + + MAPREDUCE-2492. The new MapReduce API should make available task's + progress to the task. (amarrk) + + MAPREDUCE-2153. Bring in more job configuration properties in to the trace + file. (Rajesh Balamohan via amarrk) + + MAPREDUCE-1461. Feature to instruct rumen-folder utility to skip jobs worth + of specific duration. (Rajesh Balamohan via amarrk) + + MAPREDUCE-2172. Added test-patch.properties required by test-patch.sh + (nigel) + + MAPREDUCE-2156. Raid-aware FSCK. (Patrick Kling via dhruba) + + MAPREDUCE-2215. A more elegant FileSystem#listCorruptFileBlocks API + (RAID changes) (Patrick Kling via hairong) + + MAPREDUCE-1831. BlockPlacement policy for HDFS-RAID. + (Scott Chen via dhruba) + + MAPREDUCE-1906. Lower minimum heartbeat interval for TaskTracker + (Scott Carey and Todd Lipcon via todd) + + MAPREDUCE-1382. MRAsyncDiscService should tolerate missing local.dir. + (Zheng Shao and tomwhite via tomwhite) + + MAPREDUCE-2263. MapReduce side of HADOOP-6904: RPC compatibility. + (hairong) + + MAPREDUCE-1706. Log RAID recoveries on HDFS. (schen) + + MAPREDUCE-2334. Update BlockPlacementPolicyRaid for the new method + in BlockPlacementPolicy. (szetszwo) + + MAPREDUCE-2254. Allow setting of end-of-record delimiter for + TextInputFormat (Ahmed Radwan via todd) + + MAPREDUCE-1927. Unit test for HADOOP-6835 (concatenated gzip support). + (Greg Roelofs via tomwhite) + + MAPREDUCE-2206. The task-cleanup tasks should be optional. (schen) + + MAPREDUCE-2225. MultipleOutputs should not require the use of 'Writable'. + (Harsh J Chouraria via tomwhite) + + MAPREDUCE-1811. Job.monitorAndPrintJob() should print status of the job + at completion. (Harsh J Chouraria via tomwhite) + + MAPREDUCE-993. bin/hadoop job -events <#-of-events> + help message is confusing. (Harsh J Chouraria via tomwhite) + + MAPREDUCE-2302. Add static factory methods in GaloisField. (schen) + + MAPREDUCE-2351. mapred.job.tracker.history.completed.location should + support an arbitrary filesystem URI. (tomwhite) + + MAPREDUCE-2239. BlockPlacementPolicyRaid should call getBlockLocations + only when necessary. (schen) + + MAPREDUCE-2331. Add coverage of task graph servlet to fair scheduler system + test. (todd) + + MAPREDUCE-2367. Allow using a file to exclude certain tests from build. + (todd) + + MAPREDUCE-2202. Generalize CLITest structure and interfaces to faciliate + upstream adoption (e.g. for web or system testing). (cos) + + MAPREDUCE-2420. JobTracker should be able to renew delegation token over + HTTP (Boris Shkolnik via jitendra) + + MAPREDUCE-2474. Add docs to the new API Partitioner on how to access the + Job Configuration. (Harsh J Chouraria via todd) + + MAPREDUCE-2475. Disable IPV6 for junit tests. (suresh srinivas via mahadev) + + MAPREDUCE-2422. Removed unused internal methods from DistributedCache. + (tomwhite) + + MAPREDUCE-2456. Log the reduce taskID and associated TaskTrackers with + failed fetch notifications in the JobTracker log. + (Jeffrey Naisbitt via cdouglas) + + MAPREDUCE-869. Documentation for config to set map/reduce task environment + (Alejandro Abdelnur via todd) + + MAPREDUCE-2410. Add entry to streaming FAQ about how streaming reducers + receive keys. (Harsh J Chouraria via todd) + + MAPREDUCE-2499. MR part of HADOOP-7291. (eli) + + MAPREDUCE-2497. Missing spaces in error messages. (eli) + + MAPREDUCE-2502. JobSubmitter should use mapreduce.job.maps instead of + its deprecated equivalent. (eli via todd) + + MAPREDUCE-2381. JobTracker instrumentation not consistent about error + handling. (Philip Zeyliger via tomwhite) + + MAPREDUCE-2449. Allow for command line arguments when performing + "Run on Hadoop" action in Eclipse plugin. (Jeff Zemerick via todd) + + MAPREDUCE-2483. Remove duplication of jars between Hadoop subprojects + from build artifacts. (Eric Yang via omalley) + + MAPREDUCE-2372. TaskLogAppender mechanism shouldn't be set up in + log4j.properties (todd) + + MAPREDUCE-2516. Rename webinterface.private.actions to + mapreduce.jobtracker.webinterface.trusted (Ari Rabkin via todd) + + MAPREDUCE-2459. Cache HAR filesystem metadata. (Mac Yang via mahadev) + + HADOOP-7259. Contrib modules should include the build.properties from + the enclosing hadoop directory. (omalley) + + MAPREDUCE-2494. Order distributed cache deletions by LRU. (Robert Joseph + Evans via cdouglas) + + MAPREDUCE-2452. Makes the cancellation of delegation tokens happen in a + separate thread. (ddas) + + HADOOP-7106. Reorganize project SVN layout to "unsplit" the projects. + (todd, nigel) + + MAPREDUCE-2249. Check the reflexive property of Counters objects when + comparing equality. (Devaraj K via todd) + + MAPREDUCE-2623. Update ClusterMapReduceTestCase to use + MiniDFSCluster.Builder (Harsh J Chouraria via eli) + + MAPREDUCE-2602. Allow setting of end-of-record delimiter for + TextInputFormat for the old API. (Ahmed Radwan via todd) + + MAPREDUCE-2705. Permits parallel multiple task launches. + (Thomas Graves via ddas) + + MAPREDUCE-2489. Jobsplits with random hostnames can make the queue + unusable (jeffrey naisbit via mahadev) + + MAPREDUCE-2854. update INSTALL with config necessary run mapred on yarn. + (thomas graves via mahadev) + + MAPREDUCE-2701. app/Job.java needs UGI for the user that launched it. + (Robert Evans via mahadev) + + MAPREDUCE-2652. Enabled multiple NMs to be runnable on a single node by + making shuffle service port to be truely configurable. (Robert Evans via + vinodkv) + + MAPREDUCE-2735. Add an applications summary log to ResourceManager. + (Thomas Graves via acmurthy) + + MAPREDUCE-2697. Enhance CapacityScheduler to cap concurrently running + applications per-queue & per-user. (acmurthy) + Configuration changes: + add yarn.capacity-scheduler.maximum-am-resource-percent + + MAPREDUCE-2774. Add startup message to ResourceManager & NodeManager on + startup. (Venu Gopala Rao via acmurthy) + + MAPREDUCE-2655. Add audit logs to ResourceManager and NodeManager. (Thomas + Graves via acmurthy) + + MAPREDUCE-2864. Normalize configuration variable names for YARN. (Robert + Evans via acmurthy) + + MAPREDUCE-2690. Web-page for FifoScheduler. (Eric Payne via acmurthy) + + MAPREDUCE-2711. Update TestBlockPlacementPolicyRaid for the new namesystem + and block management APIs. (szetszwo) + + MAPREDUCE-2933. Change allocate call to return ContainerStatus for + completed containers rather than Container. (acmurthy) + + MAPREDUCE-2675. Reformat JobHistory Server main page to be more + useful. (Robert Joseph Evans via vinodkv). + + MAPREDUCE-2896. Simplify all apis to in + org.apache.hadoop.yarn.api.records.* to be get/set only. Added javadocs to + all public records. (acmurthy) + + MAPREDUCE-2676. MR-279: JobHistory Job page needs reformatted. (Robert Evans via + mahadev) + + MAPREDUCE-2899. Replace major parts of ApplicationSubmissionContext with a + ContainerLaunchContext (Arun Murthy via mahadev) + + MAPREDUCE-2966. Added ShutDown hooks for MRV2 processes so that they can + gracefully exit. (Abhijit Suresh Shingate via vinodkv) + + MAPREDUCE-2672. MR-279: JobHistory Server needs Analysis this job. + (Robert Evans via mahadev) + + MAPREDUCE-2965. Streamlined the methods hashCode(), equals(), compareTo() + and toString() for all IDs. (Siddharth Seth via vinodkv) + + MAPREDUCE-2726. Added job-file to the AM and JobHistoryServer web + interfaces. (Jeffrey Naisbitt via vinodkv) + + MAPREDUCE-2880. Improve classpath-construction for mapreduce AM and + containers. (Arun C Murthy via vinodkv) + + MAPREDUCE-3055. Simplified ApplicationAttemptId passing to + ApplicationMaster via environment variable. (vinodkv) + + MAPREDUCE-3092. Removed a special comparator for JobIDs in JobHistory as + JobIDs are already comparable. (Devaraj K via vinodkv) + + MAPREDUCE-3099. Add docs for setting up a single node MRv2 cluster. + (mahadev) + + MAPREDUCE-3001. Added task-specific counters to AppMaster and JobHistory + web-UIs. (Robert Joseph Evans via vinodkv) + + MAPREDUCE-3098. Fixed RM and MR AM to report YarnApplicationState and + application's FinalStatus separately. (Hitesh Shah via vinodkv) + + MAPREDUCE-2889. Added documentation for writing new YARN applications. + (Hitesh Shah via acmurthy) + + MAPREDUCE-3134. Added documentation the CapacityScheduler. (acmurthy) + + MAPREDUCE-3013. Removed YarnConfiguration.YARN_SECURITY_INFO and its usage + as it doesn't affect security any more. (vinodkv) + + MAPREDUCE-2907. Changed log level for various messages in ResourceManager + from INFO to DEBUG. (Ravi Prakash via vinodkv) + + MAPREDUCE-2702. Added a new API in OutputCommitter for recovering + the outputs of tasks from a crashed job so as to support MR Application + Master recovery. (Sharad Agarwal and Arun C Murthy via vinodkv) + + MAPREDUCE-2738. Added the missing cluster level statistics on the RM web + UI. (Robert Joseph Evans via vinodkv) + + MAPREDUCE-2988. Reenabled TestLinuxContainerExecutor reflecting the + current NodeManager code. (Robert Joseph Evans via vinodkv) + + MAPREDUCE-3161. Improved some javadocs and fixed some typos in + YARN. (Todd Lipcon via vinodkv) + + MAPREDUCE-3148. Ported MAPREDUCE-2702 to old mapred api for aiding task + recovery. (acmurthy) + + MAPREDUCE-3133. Running a set of methods in a Single Test Class. + (Jonathan Eagles via mahadev) + + MAPREDUCE-3059. QueueMetrics do not have metrics for aggregate + containers-allocated and aggregate containers-released. + (Devaraj K via mahadev) + + MAPREDUCE-3187. Add names for various unnamed threads in MR2. + (Todd Lipcon and Siddharth Seth via mahadev) + + MAPREDUCE-3136. Added documentation for setting up Hadoop clusters in both + non-secure and secure mode for both HDFS & YARN. (acmurthy) + + MAPREDUCE-3068. Added a whitelist of environment variables for containers + from the NodeManager and set MALLOC_ARENA_MAX for all daemons and + containers. (Chris Riccomini via acmurthy) + + MAPREDUCE-3144. Augmented JobHistory with the information needed for + serving aggregated logs. (Siddharth Seth via vinodkv) + + MAPREDUCE-3163. JobClient spews errors when killing MR2 job. + (mahadev) + + MAPREDUCE-3239. Use new createSocketAddr API in MRv2 to give better + error messages on misconfig (Todd Lipcon via mahadev) + + MAPREDUCE-2747. Cleaned up LinuxContainerExecutor binary sources and changed + the configuration to use yarn names. (Robert Joseph Evans via vinodkv) + + MAPREDUCE-3205. Fix memory specifications to be physical rather than + virtual, allowing for a ratio between the two to be configurable. (todd + via acmurthy) + + MAPREDUCE-2986. Fixed MiniYARNCluster to support multiple NodeManagers. + (Anupam Seth via vinodkv) + + MAPREDUCE-2736. Remove unused contrib components dependent on MR1. (eli) + + MAPREDUCE-2989. Modified JobHistory to link to task and AM logs from the + JobHistoryServer. (Siddharth Seth via vinodkv) + + MAPREDUCE-3014. Rename and invert logic of '-cbuild' profile to 'native' and off + by default. (tucu) + + MAPREDUCE-3171. normalize nodemanager native code compilation with common/hdfs + native. (tucu) + + MAPREDUCE-3146. Added a MR specific command line to dump logs for a + given TaskAttemptID. (Siddharth Seth via vinodkv) + + MAPREDUCE-3275. Added documentation for AM WebApp Proxy. (Robert Evans via + acmurthy) + + MAPREDUCE-3322. Added a better index.html and an brief overview of YARN + architecture. (acmurthy) + + OPTIMIZATIONS + + MAPREDUCE-2026. Make JobTracker.getJobCounters() and + JobInProgress.getCounters() aquire locks in a shorter time period. + (Joydeep Sen Sarma via schen) + + MAPREDUCE-2740. MultipleOutputs in new API creates needless + TaskAttemptContexts. (todd) + + MAPREDUCE-901. Efficient framework counters. (llu via acmurthy) + + MAPREDUCE-2629. Workaround a JVM class loading quirk which prevents + JIT compilation of inner classes methods in ReduceContextImpl. + + BUG FIXES + + MAPREDUCE-2603. Disable High-Ram emulation in system tests. + (Vinay Kumar Thota via amarrk) + + MAPREDUCE-2539. Fixed NPE in getMapTaskReports in JobClient. (Robert Evans via + acmurthy) + + MAPREDUCE-1978. Rumen TraceBuilder should provide recursive + input folder scanning. + + MAPREDUCE-2416. Remove the restriction of specifying group names in + users-list file for Gridmix in RoundRobinUserResolver mode. + + MAPREDUCE-2417. Fix Gridmix in RoundRobinUserResolver mode to + map testing/proxy users to unique users in a trace. + + MAPREDUCE-2307. Exception thrown in Jobtracker logs, when the Scheduler + configured is FairScheduler. (Devaraj K via matei) + + MAPREDUCE-2199. build is broken 0.22 branch creation. (cos) + + MAPREDUCE-1752. Implement getFileBlockLocations in HarFilesystem. + (Patrick Kling via dhruba) + + MAPREDUCE-2155. RaidNode should optionally use the mapreduce jobs to + fix missing blocks. (Patrick Kling via dhruba) + + MAPREDUCE-1334. Fix TestIndexUpdater by ignoring _SUCCESS file in HDFS. + (Kay Kay via yhemanth) + + MAPREDUCE-2232. Add missing methods to TestMapredGroupMappingServiceRefresh. + (Todd Lipcon via eli) + + MAPREDUCE-2271. Fix TestSetupTaskScheduling failure on trunk. + (Liyin Liang via todd) + + MAPREDUCE-2290. Fix compilation error in TestTaskCommit. (eli) + + MAPREDUCE-2294. Fix compilation error in mumak. (todd) + + MAPREDUCE-2300. Fix TestUmbilicalProtocolWithJobToken on trunk after + HADOOP-6904. (todd) + + MAPREDUCE-2296. Fix references to misspelled method name + getProtocolSigature (todd) + + MAPREDUCE-2311. Fix TestFairScheduler failure (schen) + + MAPREDUCE-1996. API: Reducer.reduce() method detail misstatement. + (Harsh J Chouraria via tomwhite) + + MAPREDUCE-2203. Wrong javadoc for TaskRunner's appendJobJarClasspaths + method. (Jingguo Yao via tomwhite) + + MAPREDUCE-2074. Task should fail when symlink creation fails. + (Priyo Mustafi via tomwhite) + + MAPREDUCE-1242. Chain APIs error misleading. + (Harsh J Chouraria via tomwhite) + + MAPREDUCE-2379. Adds missing DistributedCache configurations in + mapred-default.xml (Todd Lipcon via amareshwari) + + MAPREDUCE-2348. Disable mumak tests on trunk since they currently time out + (todd) + + MAPREDUCE-2395. TestBlockFixer timing out on trunk. (Ramkumar Vadali via + todd) + + MAPREDUCE-2426. Make TestFairSchedulerSystem fail with more verbose output + (todd) + + MAPREDUCE-2448. NoSuchMethodError: + org.apache.hadoop.hdfs.TestDatanodeBlockScanner.corruptReplica(..) (eli) + + MAPREDUCE-2460. Fix flaky test TestFairSchedulerSystem. (todd) + + MAPREDUCE-2451. Log the details from health check script at the + JobTracker. (Thomas Graves via cdouglas) + + MAPREDUCE-2467. HDFS-1052 changes break the raid contrib module in + MapReduce. (suresh srinivas via mahadev) + + MAPREDUCE-2258. IFile reader closes stream and compressor in wrong order. + (todd via tomwhite) + + MAPREDUCE-2518. The t flag is missing in distcp help message. (Wei Yongjun + via szetszwo) + + MAPREDUCE-2514. Fix typo in TaskTracker ReinitTrackerAction log message. + (Jonathan Eagles via cdouglas) + + MAPREDUCE-2490. Add logging to graylist and blacklist activity to aid + diagnosis of related issues. (Jonathan Eagles via cdouglas) + + MAPREDUCE-2495. exit() the TaskTracker when the distributed cache cleanup + thread dies. (Robert Joseph Evans via cdouglas) + + MAPREDUCE-2470. Fix NPE in RunningJobs::getCounters. (Robert Joseph Evans + via cdouglas) + + MAPREDUCE-2536. Update FsShell -mv command usage in TestMRCLI. (Daryn + Sharp via szetszwo) + + MAPREDUCE-2529. Add support for regex-based shuffle metric counting + exceptions. (Thomas Graves via cdouglas) + + MAPREDUCE-2559. ant binary fails due to missing c++ lib dir. (eli) + + MAPREDUCE-2573. Fix new findbugs warning introduced by MAPREDUCE-2494. + (Robert Joseph Evans via todd) + + MAPREDUCE-2581. Spelling errors in log messages. (Tim Sell via eli) + + MAPREDUCE-2588. Change raid to the new DataTransferProtocol API. (szetszwo) + + MAPREDUCE-2576. Typo in comment in SimulatorLaunchTaskAction.java. + (Tim Sell via jghoman) + + MAPREDUCE-2550. Fix bin/mapred to work properly from within a source + checkout (Eric Yang via todd) + + MAPREDUCE-2620. Update RAID for HDFS-2087. (szetszwo) + + MAPREDUCE-2624. Update RAID for HDFS-2107. (szetszwo) + + MAPREDUCE-2670. Fixing spelling mistake in FairSchedulerServlet.java. (eli) + + MAPREDUCE-2710. Update JobSubmitter.printTokens(..) for HDFS-2161. + (szetszwo) + + MAPREDUCE-2409. DistributedCache maps files and archives to the same path, + despite semantic incompatibility. (Siddharth Seth via cdouglas) + + MAPREDUCE-2575. TestMiniMRDFSCaching fails if test.build.dir is set + to something other than build/test (Thomas Graves via mahadev) + + MAPREDUCE-2622. Remove the last remaining reference to the deprecated + configuration "io.sort.mb". (Harsh J Chouraria via todd) + + MAPREDUCE-2732. Remove directly accessing FSNamesystem.LOG from + TestCopyFiles and TestDistCh. (szetszwo) + + MAPREDUCE-2463. Job history files are not moved to done folder when job + history location is hdfs. (Devaraj K via szetszwo) + + MAPREDUCE-2243. Close streams propely in a finally-block to avoid leakage + in CompletedJobStatusStore, TaskLog, EventWriter and TotalOrderPartitioner. + (Devaraj K via szetszwo) + + MAPREDUCE-2741. Make ant build system work with hadoop-common JAR + generated by Maven. (Alejandro Abdelnur via tomwhite) + + MAPREDUCE-2760. mapreduce.jobtracker.split.metainfo.maxsize typoed + in mapred-default.xml. (todd via eli) + + MAPREDUCE-2797. Update mapreduce tests and RAID for HDFS-2239. (szetszwo) + + MAPREDUCE-2805. Update RAID for HDFS-2241. (szetszwo) + + MAPREDUCE-2837. Ported bug fixes from y-merge to prepare for MAPREDUCE-279 + merge. (acmurthy) + + MAPREDUCE-2541. Fixed a race condition in IndexCache.removeMap. (Binglin + Chang via acmurthy) + + MAPREDUCE-2458. Rename sanitized pom.xml in build directory to work around IDE + bug (Luke Lu via mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Clear application notification if sent once + to NodeManager (mahadev) + + MAPREDUCE-2433. YARNApplicationConstants hard code app master jar version (Luke + Lu via mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Implement restart for resource manager + phase 1 - Helper classes to store and restore the data structures. (mahadev) + + MAPREDUCE-2414. Change MRv2 to use generic interfaces. (Siddharth Seth via + acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. Implement health-checks for the node - + server side(ResourceManager) changes. (vinodkv) + + MAPREDUCE-2405: Implement uber-AppMaster (in-cluster LocalJobRunner for MRv2) + (Greg Roelofs via mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Implementing Containers' memory monitoring. + (vinodkv) + + MAPREDUCE-2440. Name clashes in TypeConverter (luke via mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Add fail count to the command line of the + application master. (mahadev) + + MAPREDUCE-2424. Polish uber-AppMaster: add uber-AM counters and GUI indicators. + (Greg Roelofs via mahadev) + + MAPREDUCE-2405. Implement uber-AppMaster (in-cluster LocalJobRunner for MRv2). + (Greg Roelofs and Sharad Agarwal via mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Fix YarnRemoteException to give more + details. (Siddharth Seth via mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. WebApp for Job History (Krishna + Ramachandran via mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Install sanitized poms for downstream + sanity (Luke Lu via mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Add HistoryCleanerService to Job History + server. (Krishna Ramachandran via sharad) + + MAPREDUCE-279. Fix in MR-279 branch. Implement 'bin/mapred job -list' and + 'bin/mapred job -list-active-trackers'. (acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. Implement 'bin/mapred queue [-info + [-showJobs]] [-list] and enhanced 'bin/mapred job -list' to show queue and + ApplicationMaster information. (acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. Fixed computation of user-limits at + runtime. (acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. Added functionality to refresh queues at + runtime via the 'bin/yarn rmadmin' command. (acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. Added functionality to stop/start queues. + (acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. Introducing web-UI for NodeManager and + linking it from RM UI. (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Fix race condition in TestJobHistoryEvents + and TestJobHistoryParsing. (sharad) + + MAPREDUCE-279. Fix in MR-279 branch. Add Containers' logs' view to NM UI and + link it from AM UI. (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Add ACLs for queues and command-line + utilities for viewing them. (acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. Recovery of MR Application Master from + failures. (sharad) + + MAPREDUCE-279. Fix in MR-279 branch. Added support High-RAM applications in + CapacityScheduler. (acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. Completing the ZooKeeper Store for + ResourceManager state. (mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Reorient container localization to be + per-container rather than per-application. (cdouglas) + + MAPREDUCE-279. Fix in MR-279 branch. Fix file creation in + JobHistoryEventHandler. (sharad) + + MAPREDUCE-279. Fix in MR-279 branch. Disable ContainerMonitoring for non-linux + systems. (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Fix container launch w/ inconsistent + credential file naming. (cdouglas) + + MAPREDUCE-2434. Metrics for ResourceManager. (Luke Lu via acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. RM Restart Phase 2 - Completed the recovery + of components in the RM (mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Fix to send finish application event only + when the application is finished (mahadev) + + MAPREDUCE-2462. Write job conf along with JobHistory, other minor improvements. + (Siddharth Seth via sharad) + + MAPREDUCE-279. Fix in MR-279 branch. Implement 'delay scheduling' for better + locality in CapacityScheduler and improved high-ram applications. (acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. Implement Job Acls in MR Application + Master. (sharad) + + MAPREDUCE-279. Fix in MR-279 branch. Moving userlogs out of container work-dir + into a separate directory structure. (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Completing RM Restart. Completed Phase 3 of + making sure events are logged and restored (mahadev) + + MAPREDUCE-2468. Add metrics for NM Shuffle. (Luke Lu via cdouglas) + + MAPREDUCE-279. Fix in MR-279 branch. Adding user log handling for YARN. Making + NM put the user-logs on DFS and providing log-dump tools. (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Fixing three tight-loops in RM that are + causing high cpu-usage. (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Replacing FileContext usage with FileSystem + to work around security authentication issues with FileContext against a secure + DFS. (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Client reconnect to restarted AM. (sharad) + + MAPREDUCE-279. Fix in MR-279 branch. Fix refreshProxy in ClientServiceDelegate. + (sharad) + + MAPREDUCE-279. Fix in MR-279 branch. Fix Null Pointer in TestUberAM. (sharad) + + MAPREDUCE-2478. Improve history server. (Siddharth Seth via sharad) + + MAPREDUCE-279. Fix in MR-279 branch. Improved TestJobHistoryEvents and + TestJobHistoryParsing. (sharad) + + MAPREDUCE-279. Fix in MR-279 branch. Fix NM to use multiple disks for local + files and the userlogs. (vinodkv) + + MAPREDUCE-2480: MR App should not depend on hard coded version of shuffle (luke + lu via mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Propagate error back to client in case of a + job submission failure (mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Fix assembly to add mapreduce shell scripts + to the assembly package. (mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Fix TestQueueMetrics. (Luke Lu via sharad) + + MAPREDUCE-279. Fix in MR-279 branch. Removal of stale application-log dirs from + NM local disks. (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Add license header and minor cleanup in + history server. (Siddharth Seth via sharad) + + MAPREDUCE-279. Fix in MR-279 branch. Minor fix for install instructions. + (mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Fix a race in MR task that was causing MR + containers to overwrite each other's job.xml. Also fix leaking attempt-dirs in + app-local-dir. (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Adding valid state to ASM on a finish when + its already completed and also disble UberAM. (mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Fixed CS user limits. (acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. Fixed reservation's bad interaction with + delay scheduling in CS. (acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. Support mapreduce old (0.20) APIs. (sharad) + + MAPREDUCE-279. Fix in MR-279 branch. Support fail-fast for MR jobs. (ddas) + + MAPREDUCE-279. Fix in MR-279 branch. Fix for clearing container requests on an + AM failure and add tostring methods to taskids and taskattemptids for better + grep support. (mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Speed up communication between MR AM and RM + by relying on a new config rather than AM_EXPIRY_INTERVAL which is too large. + (acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. Fix calculation of maximum capacity to use + parent's absolute-capacity rather than the leaf queue's absolute-capacity. + (acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. Fixing a bug in JobIDPbImpl that's causing + AM to crash randomly. (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Fix calculation of maximum capacity in + ParentQueue to use its parent's absolute-capacity rather than its own + absolute-capacity. (acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. Launching bin/yarn and bin/mapred only + *once* in AM for constructing classpaths to avoid multiple forks and huge vmem + usage by AM. (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Fix CapacityScheduler to release unused + reservations on application completion. (acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. Fix CapacityScheduler (LeafQueue) to not + allocate DATA_LOCAL containers when they are not required on the rack. + (acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. Makes uber-task disabled by default (ddas) + + MAPREDUCE-279. Fix in MR-279 branch. Make logging and memory for AM configurable + for the user via command line (mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Fixing a bug in previous patch (r1103657). + Now bin/yarn truly shouldn't be launched multiple times in a single AM. + (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Fixing a bug to do with setting the staging + dir. (ddas) + + MAPREDUCE-279. Fix in MR-279 branch. Fixed Composite Service to shutdown + services if an error occurs on starting any one of those (mahadev & chris) + + MAPREDUCE-279. Fix in MR-279 branch. Fix the tests to use jvm fork mode to avoid + errors in shutting down services (sidharth seth) + + MAPREDUCE-2500. PB factories are not thread safe (Siddharth Seth via mahadev) + + MAPREDUCE-2504. race in JobHistoryEventHandler stop (Siddharth Seth via mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Fix job hang if the AM launch fails. + (mahadev) + + MAPREDUCE-2509. Fix NPE in UI for pending attempts. (Luke Lu via mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Add junit jar to lib in assembly (mahadev + and luke) + + MAPREDUCE-279. Fix in MR-279 branch. Distributed cache bug fix to pass Terasort. + (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Fix null pointer exception in kill task + attempt (mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Refactored RMContainerAllocator to release + unused containers. (sharad) + + MAPREDUCE-279. Fix in MR-279 branch. Changed Scheduler to return available limit + to AM in the allocate api. (acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. Fix nodemanager expiry to not throw OOM. + (mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Use DefaultContainerExecutor for + integration tests. (cdouglas) + + MAPREDUCE-279. Fix in MR-279 branch. Fix NPE in test case (mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Fix for regression on the scheduling of + reduces before maps are done (ddas) + + MAPREDUCE-279. Fix in MR-279 branch. Fix distributed-cache related bugs. + (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Ensure unused containers released by AM are + correctly counted for queue-capacity. (acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. Fix TestRuntimeEstimators (Siddharth Seth + via ddas) + + MAPREDUCE-279. Fix in MR-279 branch. Fix queue refresh to correctly record newly + added queues in CapacityScheduler. (acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. Added metrics for tracking reservations in + CapacityScheduler. (Luke Lu via acmurthy) + + MAPREDUCE-2522. Security for JobHistory service. (Siddharth Seth via mahadev) + + MAPREDUCE-2534. Fix CI breaking hard coded version in jobclient pom. (Luke Lu + via mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Add public cache. (cdouglas) + + MAPREDUCE-279. Fix in MR-279 branch. Made number of RPC server threads + configurable. (acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. Added acl check for RMAdmin. (acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. Adding job kill for any state that the job + is in with access control. (mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Add debug statements for AM not launching + (mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Fixing race condition leader to hung jobs + in scheduler negotiator (mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Add debug config for delaying delete of + local files. (cdouglas) + + MAPREDUCE-2527. Metrics for MRAppMaster (Luke lu via mahadev) + + MAPREDUCE-2532. Metrics for NodeManager (Luke Lu via mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Fixed an NPE during handling of unnecessary + reservations in CS. (acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. Fix for public dist cache to work with non + default hdfs (mahadev &ddas) + + MAPREDUCE-279. Fix in MR-279 branch. Making streaming -file option work. Also + minor fixes for successful compilation of contrib tests. (vinodkv) + + MAPREDUCE-2536. Backporting changes to MR-279. + + MAPREDUCE-279. Fix in MR-279 branch. Bugfix for using user staging directory for + history files (Siddharth Seth via mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. First fix for making basic speculative + execution work (ddas) + + MAPREDUCE-279. Fix in MR-279 branch. Fixes for TestFail/Kill (ddas) + + MAPREDUCE-279. Fix in MR-279 branch. Set correct version of avro-maven-plugin + that is available in apache maven repositories. (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Fixing and reneabling + TestContainerTokenSecretManager. (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Cleaning up configuration constants in + mapreduce modules. (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Fixing NPE on speculator in MRAppMaster and + making job-history optional in tests to make test goal succeed. (vinodk and + sharadag). + + MAPREDUCE-279. Fix in MR-279 branch. Fixed NPE in CS by checking Application + state before scheduling and fixing synchronization in CS. (acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. Making pipes work with YARN. Changed pipes + to get log-locations from an environmental variable. (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Ensure 'lost' NodeManagers are dealt + appropriately, the containers are released correctly. (acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. Adding some more logging for AM expiry logs + (mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Reduce ramp up and zero maps support. + (sharad) + + MAPREDUCE-279. Fix in MR-279 branch. Allowing hdfs calls from streaming/pipes + tasks. (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Added ability to decommission nodes and + completed RM administration tools to achieve parity with JobTracker. (acmurthy) + + MAPREDUCE-2551. Added JobSummaryLog. (Siddharth Seth via acmurthy) + + MAPREDUCE-2552. Fixed NPE in CompletedJob in JobHistoryServer. (Siddharth Seth + via acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. Fix reduce slow start. (sharad) + + MAPREDUCE-279. Fix in MR-279 branch. Fixed TestFifoScheduler. (acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. Fix build issue for using yarn.version + instead of hadoop-mapred.version (mahadev and giri) + + MAPREDUCE-279. Fix in MR-279 branch. Fixes in the handling of KILL events in the + SUCCEEDED state for tasks in the application master (ddas) + + MAPREDUCE-279. Fix in MR-279 branch. Fix for NPE in TestRMNMRPCResponseId. + (mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Fix a deadlock in the resourcemanager. + (mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. NodeStatus.getNodeHealthStatus().setBlah + broken (Siddharth Seth) + + MAPREDUCE-279. Fix in MR-279 branch. Fix another NPE in TestRMNMRPCResponseId. + (mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Fix for NPE in TestNMExpiry (siddharth + seth) + + MAPREDUCE-279. Fix in MR-279 branch. Making each node aggregate all its + user-logs to a separate hdfs file. (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Fix calculation of max-capacity for a + queue, also fixed a bug in registration of NodeManagers. (acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. More cleaning up constants, removing stale + code, and making conspicuous the envs that apps depend on to be provided by + YARN. (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Fix container size rounding in AM and + headroom in RM. (acmurthy and sharad) + + MAPREDUCE-279. Fix in MR-279 branch. Disable Job acls until fixed (mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Fix to report job status if the application + is KILLED/FAILED. (mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Fix a corner case in headroom computation - + now reservations are taken into account and headroom is computed much later to + account for allocations/reservations. (acmurthy) + + MAPREDUCE-2537. The RM writes its log to + yarn-mapred-resourcemanager-.out (Robert Evans via mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Fix logging for showing the state of job + (FAILED/KILLED/SUCCEEDED) when it completes (mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Re-enabled TestCapacityScheduler. + (acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. Support for min and max container capacity. + (acmurthy and sharad) + + MAPREDUCE-2531. Fixed jobcontrol to downgrade JobID. (Robert Evans via acmurthy) + + MAPREDUCE-2539. Fixed NPE in getMapTaskReports in JobClient. (Robert Evans via + acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. Fixing the wrong config key used in + JobHistory that prevented configuring move-thread interval. (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Fixed inconsistency in QueueACL enums. + (acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. Fix various issues with Web UI's. (Luke Lu) + + MAPREDUCE-279. Fix in MR-279 branch. Fix class cast exception in Task abort for + old mapreduce apis. (sharad) + + MAPREDUCE-279. Fix in MR-279 branch. Add deletion of distributed cache + resources. (cdouglas) + + MAPREDUCE-279. Fix in MR-279 branch. Disable aggregation of logs onto DFS till + JobHistoryServer starts serving logs. (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Cleanup redundant code in TaskAttemptImpl. + (sharad) + + MAPREDUCE-279. Fix in MR-279 branch. Work around broken signaling in public + cache. (cdouglas) + + MAPREDUCE-2566. YarnConfiguration should reloadConfiguration if instantiated + with a non YarnConfiguration object. (Siddharth Seth) + + MAPREDUCE-279. Fix in MR-279 branch. Fully resolve paths when launching + containers. (Siddharth Seth) + + MAPREDUCE-279. Fix in MR-279 branch. Re-enabling Uber-AM feature. (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Fixed deadlock during expiring NMs. + (acmurthy) + + MAPREDUCE-279. Fix in MR-279 branch. Solving NPEs during + ContainerManager#StopContainer. Also removing the unused + ContainerManager#CleanupContainer api. (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Remove retries in dist cache so that NM's + do not shutdown (mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Fix classpath construction for Task. + (vinodkv via sharad) + + MAPREDUCE-279. Fix in MR-279 branch. Sending Job diagnostics from AM to RM and + redirect to history-url on job completion. (vinodkv and sharadag) + + MAPREDUCE-279. Fix in MR-279 branch. Added clover in pom dependency. (sharad) + + MAPREDUCE-279. Fix in MR-279 branch. Intermittent TestMRApp failures on faster + Linux desktop. (Luke lu via mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Including source files in release + distribution (Luke Lu via mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Intermittent TestMRApp failures on faster + Linux desktop (part 2) (Luke lu via mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Disable Uber AM. (mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Added few job diagnostic messages. (sharad) + + MAPREDUCE-279. Fix in MR-279 branch. Add ability to includes src files in + assembly target for maven (Luke Lu via mahadev) + + MAPREDUCE-2582. Cleanup JobHistory event generation.(Siddharth Seth via sharad) + + MAPREDUCE-279. Fix in MR-279 branch. Fix rounding off problem in reduce ramp up. + (sharad) + + MAPREDUCE-279. Fix in MR-279 branch. Fix more rounding off problems in reduce + ramp up. Also fix a bug preventing the application of the cap on reduce ramp-up. + (Sharad Agarwal via vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Fix to exclude images dir into the tar + distribution (Luke Lu via gkesavan) + + MAPREDUCE-279. Fix in MR-279 branch. Changes a couple of usages of FileContext + to FileSystem in YarnRunner to handle distributed cache path resolutions on + non-default filesystems. (ddas) + + MAPREDUCE-279. Fix in MR-279 branch. Display failed/killed attempts of the task + on MR AM UI separately. (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Miscellaneous UI fixes + source code + formatting for MR JobHistoryEventHandler. (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Fixing broken link to logs for container on + NM web UI. (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Fixing the bug which was causing FAILED + jobs to be displayed as COMPLETED on the RM UI. (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Job level node blacklisting. (sharad) + + MAPREDUCE-279. Fix in MR-279 branch. Fix NPE in history event handling + (Siddharth Seth via mahadev) + + MAPREDUCE-2569. Ensure root queue allocated 100% capacity. (Jonathan Eagles via + cdouglas) + + MAPREDUCE-279. Fix in MR-279 branch. Fix ClassCastException in JobHistoryServer + for certain jobs. (Siddharth Seth via llu) + + MAPREDUCE-279. Fix in MR-279 branch. Changes for invoking rack resolution in the + RM and in the AM (ddas) + + MAPREDUCE-279. Fix in MR-279 branch. Fix concurrent modification exception in + the Capacity Scheduler (mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Fix stopContainer for setsid challenged + platforms. (llu) + + MAPREDUCE-2587. Generate yarn version for UI. (Thomas Graves via lluts page to + the history server UI. (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Bug fix to set correct state on containers + so as to avoid duplicate containers from RM to AM. (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Hack until MAPREDUCE-2365 is fixed to make + PIG work with MRV2. (mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Changes a couple of usages of FileContext + to FileSystem in TaskAttemptImpl to handle distributed cache path resolutions on + non-default filesystems. (ddas) + + MAPREDUCE-279. Fix in MR-279 branch. Fix NPE when requesting attempts for + completed jobs. (Siddharth Seth via llu) + + MAPREDUCE-279. Fix in MR-279 branch. Improve logging for AM when requesting + containers to show the right ask and release fields (mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Fix race condition between multiple + localizers on a single node. (cdouglas via mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Fix RM app start/finish time and + diagnostics. (llu) + + MAPREDUCE-279. Fix in MR-279 branch. Fix to schedule reduces irrespective of the + headroom when all maps are done so as to avoid stall in reduce-scheduling when + slow-start is disabled. (Sharad Agarwal via vinodkv). + + MAPREDUCE-279. Fix in MR-279 branch. Disabling locality-wait in + CapacityScheduler for now to prevent uber-slow scheduling for apps with no + data-locality constraints (sleep-job like). (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Fixing scheduling deadlock in AM because of + incorrect headRoom values from RM. The bug happens when AM releases containers + and RM decrements current memory usage twice for all those containers. (vinodkv) + + MAPREDUCE-2611. Fix counters, finish times etc. in job history. (Siddharth Seth + via llu) + + MAPREDUCE-279. Fix in MR-279 branch. Fix for ConcurrentModification exception + while iterating through tokens in a UGI in ContainerLauncherImpl. (ddas) + + MAPREDUCE-279. Fix in MR-279 branch. Fix for NPE in YarnChild that was causing + lots of tasks to fail. (vinodkv) + + MAPREDUCE-2615. Make killJob go through AM and fix JobSummaryLog. (Siddharth + Seth via llu) + + MAPREDUCE-279. Fix in MR-279 branch. Fix class cast exception in release + reserved containers in capacity scheduler (mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Fix diagnostics display for more than 100 + apps in RM. (llu) + + MAPREDUCE-279. Fix in MR-279 branch. Fix some invalid transitions in the RM. + (vinodkv via ddas) + + MAPREDUCE-2618. Fix NPE in 0 map 0 reduce jobs. (Jeffrey Naisbitt via llu) + + MAPREDUCE-2625. Add version info to nodemanager info page. (Jonathan Eagles via + llu) + + MAPREDUCE-279. Fix in MR-279 branch. (1) Faster retries from AM to HistoryServer + (2) Correct diagnostics for containers. (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Major ASM cleanup. Streamlining classes, + interface and events. (vinodkv) + + MAPREDUCE-279. Fix in MR-279 branch. Reinstate resolve path fixes for viewfs. + (Siddharth Seth via llu) + + MAPREDUCE-2633. Add a getCounter(Enum) method to the Counters record. (Josh + Wills via sharad) + + MAPREDUCE-2645. Updates to MRv2 INSTALL and README documentation. (Josh Wills + via vinodkv) + + MAPREDUCE-2628. Add compiled on date to NM and RM info/about page. + + MAPREDUCE-2400. Remove Cluster's dependency on JobTracker via a ServiceProvider + for the actual implementation. (tomwhite via acmurthy) + + MAPREDUCE-2663. Refactoring StateMachineFactory inner classes. (ahmed radwan via + mahadev) + + MAPREDUCE-2678. minimum-user-limit-percent no longer honored. (naisbitt via + mahadev) + + MAPREDUCE-2630. refreshQueues leads to NPEs when used w/FifoScheduler. (Josh + Wills via mahadev) + + MAPREDUCE-2644. NodeManager fails to create containers when NM_LOG_DIR is not + explicitly set in the Configuration. (Josh Wills via vinodkv) + + MAPREDUCE-2661. Fix TaskImpl to not access MapTaskImpl. (Ahmed Radwan via + sharad) + + HADOOP-6929. Backport changes to MR-279 (mahadev and owen) + + HADOOP-6929. Making Security Info abstract and not an interface (mahadev) + + MAPREDUCE-2667. mapred job -kill leaves application in RUNNING state (thomas + graves via mahadev) + + MAPREDUCE-2664. Implement JobCounters for Mtions as asynchronous. (vinodkv, + sharad and acmurthy) + + MAPREDUCE-2773. server.api.records.NodeHealthStatus renamed but not updated in + client NodeHealthStatus.java (Thomas Graves via mahadev) + + MAPREDUCE-2772. Fix MR-279 build after common mavenization. (Robert Joseph Evans + via llu) + + MAPREDUCE-2772. Fix MR-279 build after common mavenization, part 2. (Thomas + Graves via llu) + + MAPREDUCE-279. Fix in MR-279 branch. Harmonize slf4j versions. (llu) + + MAPREDUCE-279. Fix in MR-279 branch. Fix NPE in FifoScheduler. (mahadev) + + MAPREDUCE-2776. Fix some of the yarn findbug warnings. (Siddharth Seth via + mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Fix findbugs warnings in mr-client modules, + part 1 (mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Fix findbugs warnings in mr-client modules + part 2 (mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Fix findbugs warnings in mr-client modules + part 3 (mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Fix the poms to enable 0.23 snapshots for + hdfs/common from apache nightly builds (gkesavan) + + MAPREDUCE-279. Fix in MR-279 branch. Fix ivy conf to work with the hadoop common + trunk maven build changes. (Giridharan Kesavan) + + MAPREDUCE-279. Fix in MR-279 branch. Patch for findbugs warnings in Resource + Manager (Siddharth Seth via mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Fix for running ant targets to use the + right set of common/test jars (gkesavan via mahadev) + + MAPREDUCE-2782. Unit tests for CapacityScheduler. (acmurthy) + + MAPREDUCE-2706. Log job submission failures. (Jeffrey Naisbitt via acmurthy) + + MAPREDUCE-2781. mr279 RM application finishtime not set (Thomas Graves via + mahadev) + + MAPREDUCE-279. Fix in MR-279 branch. Fixed CS locality wait factor. (acmurthy) + + MAPREDUCE-2808. pull MAPREDUCE-2797 into mr279 branch (Thomas Graves via + mahadev) + + MAPREDUCE-2639. Bug fixes in speculate.DataStatistics. (Josh Wills via acmurthy) + + MAPREDUCE-2839. Fixed TokenCache to get delegation tokens using both new and old + apis. (Siddharth Seth via acmurthy) + + MAPREDUCE-2727. Fix divide-by-zero error in SleepJob for sleepCount equals 0. + (Jeffrey Naisbitt via acmurthy) + + MAPREDUCE-2839. Fixed TokenCache to get delegation tokens using both new + and old apis. (Siddharth Seth via acmurthy) + + MAPREDUCE-2727. Fix divide-by-zero error in SleepJob for sleepCount equals + 0. (Jeffrey Naisbitt via acmurthy) + + MAPREDUCE-2860. Fix log4j logging in the maven test cases. (mahadev) + + MAPREDUCE-2867. Remove Unused TestApplicaitonCleanup in resourcemanager/applicationsmanager. + (mahadev) + + MAPREDUCE-2868. ant build broken in hadoop-mapreduce dir (mahadev, giri and arun via mahadev) + + MAPREDUCE-2649. Handling of finished applications in RM. (Thomas Graves + via acmurthy) + + MAPREDUCE-2838. Fix MapReduce builds to use new hadoop-common test jars. + (gkesavan via acmurthy) + + MAPREDUCE-2859. Fix eclipse plugin contrib module compilation (gkesavan) + + MAPREDUCE-2846. Fix missing synchronization in the task log management. + (omalley) + + MAPREDUCE-2807. Fix AM restart and client redirection. (sharad) + + MAPREDUCE-2877. Add missing Apache license header in some files in MR + and also add the rat plugin to the poms. (mahadev) + + MAPREDUCE-2796. Set start times for MR applications for clients to see. + (Devaraj K via acmurthy) + + MAPREDUCE-2879. Fix version for MR-279 to 0.23.0. (acmurthy) + + MAPREDUCE-2881. Fix to include log4j 1.2.16 depenency (gkesavan) + + MAPREDUCE-2885. Fix mapred-config.sh to look for hadoop-config.sh in + HADOOP_COMMON_HOME/libexec. (acmurthy) + + MAPREDUCE-2893. Remove duplicate entry of YarnClientProtocolProvider in + ClientProtocolProvider services file. (Liang-Chi Hsieh via acmurthy) + + MAPREDUCE-2891. Javadoc for AMRMProtocol and related records. (acmurthy) + + MAPREDUCE-2898. Javadoc for ContainerManager protocol and related records. + (acmurthy) + + MAPREDUCE-2904. Fixed bin/yarn to correctly include HDFS jars and + clean up of stale refs to pre-mavenized Hadoop Common and HDFS. + (Sharad Agarwal and Arun C. Murthy via acmurthy) + + MAPREDUCE-2737. Update the progress of jobs on client side. (Siddharth Seth + and Mahadev Konar via mahadev) + + MAPREDUCE-2886. Fix Javadoc warnings in MapReduce. (mahadev) + + MAPREDUCE-2897. Javadoc for ClientRMProtocol protocol and related records. + (acmurthy) + + MAPREDUCE-2916. Ivy build for MRv1 fails with bad organization for + common daemon. (mahadev) + + MAPREDUCE-2917. Fixed corner case in container reservation which led to + starvation and hung jobs. (acmurthy) + + MAPREDUCE-2756. Better error handling in JobControl for failed jobs. + (Robert Evans via acmurthy) + + MAPREDUCE-2716. MRReliabilityTest job fails because of missing + job-file. (Jeffrey Naisbitt via vinodkv) + + MAPREDUCE-2882. TestLineRecordReader depends on ant jars. (todd) + + MAPREDUCE-2687. Fix NodeManager to use the right version of + LocalDirAllocator.getLocalPathToWrite. (mahadev & acmurthy) + + MAPREDUCE-2800. Set final progress for tasks to ensure all task information + is correctly logged to JobHistory. (Siddharth Seth via acmurthy) + + MAPREDUCE-2938. Log application submission failure in CapacityScheduler. + (acmurthy) + + MAPREDUCE-2948. Hadoop streaming test failure, post MR-2767 (mahadev) + + MAPREDUCE-2908. Fix all findbugs warnings. (vinodkv via acmurthy) + + MAPREDUCE-2947. Fixed race condition in AuxiliaryServices. (vinodkv via + acmurthy) + + MAPREDUCE-2844. Fixed display of nodes in UI. (Ravi Teja Ch N V via + acmurthy) + + MAPREDUCE-2677. Fixed 404 for some links from HistoryServer. (Robert Evans + via acmurthy) + + MAPREDUCE-2937. Ensure reason for application failure is displayed to the + user. (mahadev via acmurthy) + + MAPREDUCE-2953. Fix a race condition on submission which caused client to + incorrectly assume application was gone by making submission synchronous + for RMAppManager. (Thomas Graves via acmurthy) + + MAPREDUCE-2963. Fix hang in TestMRJobs. (Siddharth Seth via acmurthy) + + MAPREDUCE-2954. Fixed a deadlock in NM caused due to wrong synchronization + in protocol buffer records. (Siddharth Seth via vinodkv) + + MAPREDUCE-2975. Fixed YARNRunner to use YarnConfiguration rather than + Configuration. (mahadev via acmurthy) + + MAPREDUCE-2971. ant build mapreduce fails protected access jc.displayJobList + (jobs) (Thomas Graves via mahadev) + + MAPREDUCE-2691. Finishing up the cleanup of distributed cache file resources + and related tests. (Siddharth Seth via vinodkv) + + MAPREDUCE-2749. Ensure NM registers with RM after starting all its services + correctly. (Thomas Graves via acmurthy) + + MAPREDUCE-2979. Removed the needless ClientProtocolProvider configuration + from the hadoop-mapreduce-client-core module. (Siddharth Seth via vinodkv) + + MAPREDUCE-2985. Fixed findbugs warnings in ResourceLocalizationService. + (Thomas Graves via acmurthy) + + MAPREDUCE-2874. Fix formatting of ApplicationId in web-ui. (Eric Payne via + acmurthy) + + MAPREDUCE-2995. Better handling of expired containers in MapReduce + ApplicationMaster. (vinodkv via acmurthy) + + MAPREDUCE-2995. Fixed race condition in ContainerLauncher. (vinodkv via + acmurthy) + + MAPREDUCE-2949. Fixed NodeManager to shut-down correctly if a service + startup fails. (Ravi Teja via vinodkv) + + MAPREDUCE-3005. Fix both FifoScheduler and CapacityScheduler to correctly + enforce locality constraints. (acmurthy) + + MAPREDUCE-3007. Fixed Yarn Mapreduce client to be able to connect to + JobHistoryServer in secure mode. (vinodkv) + + MAPREDUCE-2987. Fixed display of logged user on RM Web-UI. (Thomas Graves + via acmurthy) + + MAPREDUCE-3006. Fixed MapReduce AM to exit only after properly writing out + history file. (vinodkv) + + MAPREDUCE-2925. Fixed Yarn+MR client code to behave saner with completed + jobs. (Devaraj K via vinodkv) + + MAPREDUCE-3030. Fixed a bug in NodeId.equals() that was causing RM to + reject all NMs. (Devaraj K via vinodkv) + + MAPREDUCE-3042. Fixed default ResourceTracker address. (Chris Riccomini + via acmurthy) + + MAPREDUCE-3038. job history server not starting because conf() missing + HsController (Jeffrey Naisbitt via mahadev) + + MAPREDUCE-3004. Fix ReduceTask to not assume 'local' mode in YARN. (Hitesh + Shah via acmurthy) + + MAPREDUCE-3017. The Web UI shows FINISHED for killed/successful/failed jobs. + (mahadev) + + MAPREDUCE-3040. Fixed extra copy of Configuration in + YarnClientProtocolProvider and ensured MiniMRYarnCluster sets JobHistory + configuration for tests. (acmurthy) + + MAPREDUCE-3018. Fixed -file option for streaming. (mahadev via acmurthy) + + MAPREDUCE-3036. Fixed metrics for reserved resources in CS. (Robert Evans + via acmurthy) + + MAPREDUCE-2998. Fixed a bug in TaskAttemptImpl which caused it to fork + bin/mapred too many times. (vinodkv via acmurthy) + + MAPREDUCE-3023. Fixed clients to display queue state correctly. (Ravi + Prakash via acmurthy) + + MAPREDUCE-2970. Fixed NPEs in corner cases with different configurations + for mapreduce.framework.name. (Venu Gopala Rao via vinodkv) + + MAPREDUCE-3062. Fixed default RMAdmin address. (Chris Riccomini + via acmurthy) + + MAPREDUCE-3066. Fixed default ResourceTracker address for the NodeManager. + (Chris Riccomini via acmurthy) + + MAPREDUCE-3044. Pipes jobs stuck without making progress. (mahadev) + + MAPREDUCE-2754. Fixed MR AM stdout, stderr and syslog to redirect to + correct log-files. (Ravi Teja Ch N V via vinodkv) + + MAPREDUCE-3073. Fixed build issues in MR1. (mahadev via acmurthy) + + MAPREDUCE-2691. Increase threadpool size for launching containers in + MapReduce ApplicationMaster. (vinodkv via acmurthy) + + + MAPREDUCE-2990. Fixed display of NodeHealthStatus. (Subroto Sanyal via + acmurthy) + + MAPREDUCE-3053. Better diagnostic message for unknown methods in ProtoBuf + RPCs. (vinodkv via acmurthy) + + MAPREDUCE-2952. Fixed ResourceManager/MR-client to consume diagnostics + for AM failures in a couple of corner cases. (Arun C Murthy via vinodkv) + + MAPREDUCE-3064. 27 unit test failures with Invalid + "mapreduce.jobtracker.address" configuration value for + JobTracker: "local" (Venu Gopala Rao via mahadev) + + MAPREDUCE-3090. Fix MR AM to use ApplicationAttemptId rather than + (ApplicationId, startCount) consistently. (acmurthy) + + MAPREDUCE-2646. Fixed AMRMProtocol to return containers based on + priority. (Sharad Agarwal and Arun C Murthy via vinodkv) + + MAPREDUCE-3031. Proper handling of killed containers to prevent stuck + containers/AMs on an external kill signal. (Siddharth Seth via vinodkv) + + MAPREDUCE-2984. Better error message for displaying completed containers. + (Devaraj K via acmurthy) + + MAPREDUCE-3071. app master configuration web UI link under the Job menu + opens up application menu. (thomas graves via mahadev) + + MAPREDUCE-3067. Ensure exit-code is set correctly for containers. (Hitesh + Shah via acmurthy) + + MAPREDUCE-2999. Fix YARN webapp framework to properly filter servlet + paths. (Thomas Graves via vinodkv) + + MAPREDUCE-3095. fairscheduler ivy including wrong version for hdfs. + (John George via mahadev) + + MAPREDUCE-3054. Unable to kill submitted jobs. (mahadev) + + MAPREDUCE-3021. Change base urls for RM web-ui. (Thomas Graves via + acmurthy) + + MAPREDUCE-3041. Fixed ClientRMProtocol to provide min/max resource + capabilities along-with new ApplicationId for application submission. + (Hitesh Shah via acmurthy) + + MAPREDUCE-2843. Fixed the node-table to be completely displayed and making + node entries on RM UI to be sortable. (Abhijit Suresh Shingate via vinodkv) + + MAPREDUCE-3110. Fixed TestRPC failure. (vinodkv) + + MAPREDUCE-3078. Ensure MapReduce AM reports progress correctly for + displaying on the RM Web-UI. (vinodkv via acmurthy) + + MAPREDUCE-3114. Fixed invalid ApplicationURL on RM WebUI. (Subroto Sanyal + via vinodkv) + + MAPREDUCE-2791. Added missing info on 'job -status' output. (Devaraj K via + acmurthy) + + MAPREDUCE-2996. Add uber-ness information to JobHistory. (Jonathan Eagles + via acmurthy) + + MAPREDUCE-3050. Add ability to get resource usage information for + applications and nodes. (Robert Evans via acmurthy) + + MAPREDUCE-3113. Ensure bin/yarn and bin/yarn-daemon.sh identify the root + of the install properly. (Xie Xianshan via acmurthy) + + MAPREDUCE-3137. Fix broken merge of MAPREDUCE-2179. (Hitesh Shah via + acmurthy) + + MAPREDUCE-2792. Replace usage of node ip-addresses with hostnames. + (vinodkv via acmurthy) + + MAPREDUCE-3112. Fixed recursive sourcing of HADOOP_OPTS environment + variable. (Eric Yang) + + MAPREDUCE-3056. Changed the default staging directory to not include + user.name to prevent issues with non-secure mode. (Devaraj K via vinodkv) + + MAPREDUCE-2913. Fixed TestMRJobs.testFailingMapper to assert the correct + TaskCompletionEventStatus. (Jonathan Eagles via vinodkv) + + MAPREDUCE-2794. [MR-279] Incorrect metrics value for AvailableGB per + queue per user. (John George via mahadev) + + MAPREDUCE-2783. Fixing RM web-UI to show no tracking-URL when AM + crashes. (Eric Payne via vinodkv) + + MAPREDUCE-3141. Fix the broken MRAppMaster to work over YARN in security + mode.(vinodkv) + + MAPREDUCE-2751. Modified NodeManager to stop leaving around local files + after application finishes. (Siddharth Seth via vinodkv) + + MAPREDUCE-3033. Ensure Master interface pays attention to classic v/s yarn + frameworks. (Hitesh Shah via acmurthy) + + MAPREDUCE-2802. Ensure JobHistory filenames have jobId. (Jonathan Eagles + via acmurthy) + + MAPREDUCE-2876. Use a different config for ContainerAllocationExpirer. + (Anupam Seth via acmurthy) + + MAPREDUCE-3153. Fix TestFileOutputCommitter which was broken by + MAPREDUCE-2702. (mahadev via acmurthy) + + MAPREDUCE-3123. Fix NM to quote symlink names to escape special + characters. (Hitesh Shah via acmurthy) + + MAPREDUCE-3154. Fix JobSubmitter to check for output specs before copying + job submission files to fail fast. (Abhijit Suresh Shingate via acmurthy) + + MAPREDUCE-3158. Fix test failures in MRv1 due to default framework being + set to yarn. (Hitesh Shah via acmurthy) + + MAPREDUCE-3167. container-executor is not being packaged with the assembly + target. (mahadev) + + MAPREDUCE-3020. Fixed TaskAttemptImpl to log the correct node-address for + a finished Reduce task. (Chackaravarthy via vinodkv) + + MAPREDUCE-2668. Fixed AuxServices to send a signal on application-finish + to all the services. (Thomas Graves via vinodkv) + + MAPREDUCE-3126. Fixed a corner case in CapacityScheduler where headroom + wasn't updated on changes to cluster size. (acmurthy) + + MAPREDUCE-3140. Fixed the invalid JobHistory URL for failed + applications. (Subroto Sanyal via vinodkv) + + MAPREDUCE-3125. Modified TaskImpl to consider only non-failed, non-killed + task-attempts for obtaining task's progress. (Hitesh Shah via vinodkv) + + MAPREDUCE-2666. Retrieve shuffle port number from JobHistory on MR AM + restart. (Jonathan Eagles via acmurthy) + + MAPREDUCE-2789. Complete schedulingInfo on CLI. (Eric Payne via acmurthy) + + MAPREDUCE-3170. Fixed job output commit for deep hierarchies. (Hitesh Shah + via acmurthy) + + MAPREDUCE-3124. Fixed location of native libs i.e. libhadoop.so for + containers. (John George via acmurthy) + + MAPREDUCE-3057. Job History Server goes of OutOfMemory with 1200 Jobs + and Heap Size set to 10 GB. (Eric Payne via mahadev) + + MAPREDUCE-2840. mr279 TestUberAM.testSleepJob test fails. (jonathan eagles + via mahadev) + + MAPREDUCE-3190. Ensure bin/yarn fails early with a clear error message + when HADOOP_COMMON_HOME or HADOOP_HDFS_HOME are not set. (todd & acmurthy + via acmurthy) + + MAPREDUCE-3189. Add link decoration back to MR2's CSS. (Todd Lipcon via + mahadev) + + MAPREDUCE-3127. Changed default value of yarn.resourcemanager.acl.enable + to true and added some more documentation. (acmurthy) + + MAPREDUCE-3032. Fixed TaskAttemptImpl so that JobHistory can have error + information about failed tasks. (Devaraj K via vinodkv) + + MAPREDUCE-3196. TestLinuxContainerExecutorWithMocks fails on Mac OSX. + (Arun Murthy via mahadev) + + MAPREDUCE-3197. TestMRClientService failing on building clean checkout of + branch 0.23 (mahadev) + + MAPREDUCE-2762. Cleanup MR staging directory on completion. (mahadev via + acmurthy) + + MAPREDUCE-3165. Ensure logging options are set correctly for MR AM and + tasks. (todd via acmurthy) + + MAPREDUCE-3203. Fix some javac warnings in MRAppMaster. (mahadev) + + MAPREDUCE-3162. Separated application-init and container-init event types + in NodeManager's Application state machine. (Todd Lipcon via vinodkv) + + MAPREDUCE-3176. Fixed ant mapreduce tests that are timing out because + of wrong framework name. (Hitesh Shah via vinodkv) + + MAPREDUCE-3181. Fixed MapReduce runtime to load yarn-default.xml and + yarn-site.xml. (acmurthy) + + MAPREDUCE-2788. Normalize resource requests in FifoScheduler + appropriately. (Ahmed Radwan via acmurthy) + + MAPREDUCE-2693. Fix NPE in job-blacklisting. (Hitesh Shah via acmurthy) + + MAPREDUCE-3208. Fix NPE task/container log appenders. (liangzhwa via + acmurthy) + + MAPREDUCE-3212. Fix usage/help message for bin/yarn. (Bhallamudi Venkata + Siva Kamesh via acmurthy) + + MAPREDUCE-3179. Ensure failed tests exit with right error code. (Jonathan + Eagles via acmurthy) + + MAPREDUCE-3188. Ensure correct shutdown in services. (todd via acmurthy) + + MAPREDUCE-3226. Fix shutdown of fetcher threads. (vinodkv via acmurthy) + + MAPREDUCE-3070. Fix NodeManager to use ephemeral ports by default. + (Devaraj K via acmurthy) + + MAPREDUCE-3242. Trunk compilation broken with bad interaction from + MAPREDUCE-3070 and MAPREDUCE-3239. (mahadev) + + MAPREDUCE-3058. Fixed MR YarnChild to report failure when task throws an + error and thus prevent a hanging task and job. (vinodkv) + + MAPREDUCE-3087. Fixed the mapreduce classpath to correctly include the + generated-classpath file needed for tests. (Ravi Prakash via vinodkv) + + MAPREDUCE-3233. Fixed a bug in MR Job so as to be able to restart the + application on AM crash. (Mahadev Konar via vinodkv) + + MAPREDUCE-3028. Added job-end notification support. (Ravi Prakash via + acmurthy) + + MAPREDUCE-3249. Ensure shuffle-port is correctly used duringMR AM recovery. + (vinodkv via acmurthy) + + MAPREDUCE-3252. Fix map tasks to not rewrite data an extra time when + map output fits in spill buffer. (todd) + + MAPREDUCE-3159. Ensure DefaultContainerExecutor doesn't delete application + directories during app-init. (todd via acmurthy) + + MAPREDUCE-2746. Yarn servers can't communicate with each other with + hadoop.security.authorization set to true (acmurthy via mahadev) + + MAPREDUCE-2821. Added missing fields (resourcePerMap & resourcePerReduce) + to JobSummary logs. (mahadev via acmurthy) + + MAPREDUCE-3253. Fixed ContextFactory to clone JobContext correctly. + (acmurthy) + + MAPREDUCE-3263. Fixed the MAPREDUCE-3028 commit which broke MR1. (Hitesh + Shah via acmurthy) + + MAPREDUCE-3269. Fixed log4j properties to correctly set logging options + for JobHistoryServer vis-a-vis JobSummary logs. (mahadev via acmurthy) + + MAPREDUCE-3250. When AM restarts, client keeps reconnecting to the new AM + and prints a lots of logs. (vinodkv via mahadev) + + MAPREDUCE-3254. Fixed streaming to set the job.jar by using the right + JobConf ctor. (acmurthy) + + MAPREDUCE-3264. mapreduce.job.user.name needs to be set automatically. + (acmurthy via mahadev) + + MAPREDUCE-3175. Add authorization to admin web-pages such as /stacks, /jmx + etc. (Jonathan Eagles via acmurthy) + + MAPREDUCE-3257. Added authorization checks for the protocol between + ResourceManager and ApplicationMaster. (vinodkv via acmurthy) + + MAPREDUCE-3259. Added java.library.path of NodeManager to + ContainerLocalizer in LinuxContainerExecutor. (Kihwal Lee via acmurthy) + + MAPREDUCE-3279. Fixed TestJobHistoryParsing which assumed user name to be + mapred all the time. (Siddharth Seth via acmurthy) + + MAPREDUCE-3240. Fixed NodeManager to be able to forcefully cleanup its + containers (process-trees) irrespective of whether the container succeeded, + or killed. Contributed by Hitesh Shah. + + MAPREDUCE-3281. Fixed a bug in TestLinuxContainerExecutorWithMocks. (vinodkv) + + MAPREDUCE-3228. Fixed MR AM to timeout RPCs to bad NodeManagers. (vinodkv + via acmurthy) + + MAPREDUCE-3284. Moved JobQueueClient to hadoop-mapreduce-client-core. + (acmurthy) + + MAPREDUCE-3282. bin/mapred job -list throws exception. (acmurthy via + mahadev) + + MAPREDUCE-3186. User jobs are getting hanged if the Resource manager + process goes down and comes up while job is getting executed. + (Eric Payne via mahadev) + + MAPREDUCE-3285. Tests on branch-0.23 failing (Siddharth Seth via mahadev) + + MAPREDUCE-3258. Fixed AM & JobHistory web-ui to display counters properly. + (Siddharth Seth via acmurthy) + + MAPREDUCE-3290. Fixed a NPE in ClientRMService. (acmurthy) + + MAPREDUCE-3185. RM Web UI does not sort the columns in some cases. + (Jonathan Eagles via mahadev) + + MAPREDUCE-3292. In secure mode job submission fails with Provider + org.apache.hadoop.mapreduce.security.token.JobTokenIndentifier$Renewer + not found. (mahadev) + + MAPREDUCE-3296. Fixed the remaining nine FindBugs warnings. (vinodkv) + + MAPREDUCE-2775. Fixed ResourceManager and NodeManager to force a + decommissioned node to shutdown. (Devaraj K via vinodkv) + + MAPREDUCE-3304. Fixed intermittent test failure due to a race in + TestRMContainerAllocator#testBlackListedNodes. (Ravi Prakash via acmurthy) + + MAPREDUCE-3306. Fixed a bug in NodeManager ApplicationImpl that was causing + NodeManager to crash. (vinodkv) + + MAPREDUCE-3295. TestAMAuthorization failing on branch 0.23. (vinodkv via mahadev) + + MAPREDUCE-3183. hadoop-assemblies/src/main/resources/assemblies/hadoop-mapreduce-dist.xml + missing license header. (Hitesh Shah via tucu). + + MAPREDUCE-3003. Publish MR JARs to Maven snapshot repository. (tucu) + + MAPREDUCE-3199. Fixed pom files to include correct log4j configuration for + tests. (vinodkv) + + MAPREDUCE-3204. mvn site:site fails on MapReduce. (tucu) + + MAPREDUCE-3248. Fixed log4j properties. (vinodkv via acmurthy) + + MAPREDUCE-3256. Added authorization checks for the protocol between + NodeManager and ApplicationMaster. (vinodkv via acmurthy) + + MAPREDUCE-3274. Fixed a race condition in MRAppMaster that was causing a + task-scheduling deadlock. (Robert Joseph Evans via vinodkv) + + MAPREDUCE-3171 merge from trunk reverted changes from MAPREDUCE-2747 MAPREDUCE-3240. + + MAPREDUCE-3313. Fixed initialization of ClusterMetrics which was failing + TestResourceTrackerService sometimes. (Hitesh Shah via vinodkv) + + MAPREDUCE-2766. Fixed NM to set secure permissions for files and directories + in distributed-cache. (Hitesh Shah via vinodkv) + + MAPREDUCE-2696. Fixed NodeManager to cleanup logs in a thread when logs' + aggregation is not enabled. (Siddharth Seth via vinodkv) + + MAPREDUCE-3262. Fixed Container's state-machine in NodeManager to handle + a couple of events in failure states correctly. (Hitesh Shah and Siddharth + Seth via vinodkv) + + MAPREDUCE-3035. Fixed MR JobHistory to ensure rack information is present. + (chakravarthy via acmurthy) + + MAPREDUCE-3321. Disabled a few MR tests for 0.23. (Hitesh Shah via + acmurthy) + + MAPREDUCE-3220. Fixed TestCombineOutputCollector. (Devaraj K via acmurthy) + + MAPREDUCE-3103. Implement Job ACLs for MRAppMaster. + (mahadev) + + MAPREDUCE-3241. [Rumen] Fix Rumen to ignore the AMStartedEvent. (amarrk) + + MAPREDUCE-3166. [Rumen] Make Rumen use job history api instead of relying + on current history file name format. (Ravi Gummadi via amarrk) + + MAPREDUCE-3157. [Rumen] Fix TraceBuilder to handle 0.20 history file + names also. (Ravi Gummadi via amarrk) + + MAPREDUCE-3081. Fix vaidya startup script. (gkesavan via suhas). + + MAPREDUCE-2764. Fix renewal of dfs delegation tokens. (Owen via jitendra) + + MAPREDUCE-3192. Fix Javadoc warning in JobClient.java and Cluster.java. + (jitendra) + + MAPREDUCE-3237. Move LocalJobRunner to hadoop-mapreduce-client-core. + (tomwhite via acmurthy) + + MAPREDUCE-3316. Rebooted link is not working properly. + (Bhallamudi Venkata Siva Kamesh via mahadev) + + MAPREDUCE-3317. Rumen TraceBuilder is emiting null as hostname. + (Ravi Gummadi via mahadev) + + MAPREDUCE-3332. contrib/raid compile breaks due to changes in hdfs/protocol/datatransfer/ + Sender#writeBlock related to checksum handling (Hitesh Shah via mahadev) + + MAPREDUCE-3337. Added missing license headers. (acmurthy) + +Release 0.22.1 - Unreleased + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + + MAPREDUCE-3837. Job tracker is not able to recover jobs after crash. + (Mayank Bansal via shv) + +Release 0.22.0 - 2011-11-29 + + INCOMPATIBLE CHANGES + + MAPREDUCE-1866. Removes deprecated class + org.apache.hadoop.streaming.UTF8ByteArrayUtils. (amareshwari) + + MAPREDUCE-1664. Changes the behaviour of the combination of job-acls + when they function together with queue-acls. (Ravi Gummadi via vinodkv) + + MAPREDUCE-2994. Fixed a bug in ApplicationID parsing that affects RM + UI. (Devaraj K via vinodkv) + + MAPREDUCE-1788. o.a.h.mapreduce.Job shouldn't make a copy of the JobConf. + (Arun Murthy via mahadev) + + NEW FEATURES + + MAPREDUCE-1804. Stress-test tool for HDFS introduced in HDFS-708. + (Joshua Harlow via shv) + + MAPREDUCE-220. Collect cpu and memory statistics per task. (Scott Chen via + acmurthy) + + MAPREDUCE-1970. Reed-Solomon code implementation for HDFS RAID. + (Scott Chen via dhruba) + + MAPREDUCE-2169. Integrated Reed-Solomon code with RaidNode. (Ramkumar + Vadali via schen) + + MAPREDUCE-2936. Contrib Raid compilation broken after HDFS-1620. (vinodkv) + + IMPROVEMENTS + + MAPREDUCE-2141. Add an "extra data" field to Task for use by Mesos. (matei) + + MAPREDUCE-2140. Regenerate fair scheduler design doc PDF. (matei) + + MAPREDUCE-1546. Redirect all job pages to corresponding history page + if job is not in JT memory. (Scott Chen via sharad) + + MAPREDUCE-1092. Enable assertions for unit tests. (Eli Collins via + cdouglas) + + MAPREDUCE-1680. Add a metric recording JobTracker heartbeats processed. + (Dick King via cdouglas) + + MAPREDUCE-1761. FairScheduler allows separate configuration of node + and rack locality wait time (Scott Chen via dhruba) + + MAPREDUCE-1539. authorization checks for inter-server protocol + (based on HADOOP-6600) (Boris Shkolnik via shv) + + MAPREDUCE-1798. Names the configuration keys for the Kerberos + principals better. (Boris Shkolnik via ddas) + + MAPREDUCE-1773. streaming doesn't support jobclient.output.filter. + (Amareshwari Sriramadasu via vinodkv) + + MAPREDUCE-1785. Add streaming config option for not emitting the key. + (Eli Collins via sharad) + + MAPREDUCE-572. If #link is missing from uri format of -cacheArchive + then streaming does not throw error. (Amareshwari Sriramadasu via + vinodkv) + + MAPREDUCE-1545. Add timestamps for first task type launched in job summary. + (Luke Lu via cdouglas) + + MAPREDUCE-1543. Add an audit log for authentication events. (Amar Kamat and + Luke Lu via cdouglas) + + MAPREDUCE-1762. Add ability to set values of task counters. (Scott Chen via + cdouglas) + + MAPREDUCE-1533. Reduce overhead of logging and string manipulation during + heartbeat processing. (Amar Kamat and Dick King via cdouglas) + + MAPREDUCE-1516. JobTracker issues delegation tokens only if the user's + authentication is Kerberos. (Jitendra Pandey via ddas) + + MAPREDUCE-647. Update distcp forrest documentation to reflect the changes + of HADOOP-5472, MAPREDUCE-642 and HADOOP-5620. (Rodrigo Schmidt via + szetszwo) + + MAPREDUCE-1851. Documents configuration parameters in streaming. + (amareshwari) + + MAPREDUCE-1868. Add a read and connection timeout to JobClient while + pulling tasklogs. (Krishna Ramachandran via acmurthy) + + MAPREDUCE-1778. Ensure failure to setup CompletedJobStatusStore is not + silently ignored by the JobTracker. (Krishna Ramachandran via acmurthy) + + MAPREDUCE-1850. Includes job submit host information (name and ip) in + jobconf and jobdetails display (Krishna Ramachandran via amareshwari) + + MAPREDUCE-1893. Slive with multiple reducers. (shv) + + MAPREDUCE-1248. Fixes redudant memory copying in StreamKeyValUtil. + (Ruibang He via amareshwari) + + MAPREDUCE-1840. Enhancements to Gridmix benchmark simulating user + diversity, queue replay, and task duration for JobTracker load testing. + Also includes compatibility with security enhancements, and scalability + improvements. (Amar Kamat, Rahul Singh, Hong Tang, and cdouglas) + + MAPREDUCE-1848. Put number of speculative, data local, rack local + tasks in JobTracker metrics. (Scott Chen via dhruba) + + MAPREDUCE-1935. Makes the Distcp to work in a secure environment. + (Boris Shkolnik via ddas) + + MAPREDUCE-1945. The MapReduce component for HADOOP-6632. + (Kan Zhang & Jitendra Pandey via ddas) + + MAPREDUCE-1936. Modify Gridmix3 to support more tunable parameters for + stress submission and sleep jobs. (Hong Tang via cdouglas) + + MAPREDUCE-1733. Makes pipes applications secure. (Jitendra Pandey via ddas) + + MAPREDUCE-1566. Adds a configuration attribute using which job clients can + specify a credentials file. The tokens from there will be passed to the job. + (Jitendra Pandey and Owen O'Malley via ddas) + + MAPREDUCE-1624. Documents the job credentials and associated details to do + with delegation tokens (on the client side). + (Jitendra Pandey and Devaraj Das via ddas) + + MAPREDUCE-1834. TestSimulatorDeterministicReplay timesout on trunk. + (Hong Tang via mahadev) + + MAPREDUCE-1993. Fixes test failure + TestTrackerDistributedCacheManagerWithLinuxTaskController. (Devaraj Das + via amareshwari) + + MAPREDUCE-1523. Making Mumak work with Capacity-Scheduler (Anirban Das + via mahadev) + + MAPREDUCE-1920. Enables completed jobstatus store by default. (Tom White + via amareshwari) + + MAPREDUCE-1881. Improve TaskTrackerInstrumentation to enable collection of + advanced metrics. (Matei Zaharia via acmurthy) + + MAPREDUCE-1548. Hadoop archives preserve times and other properties from + original files. (Rodrigo Schmidt via dhruba) + + MAPREDUCE-1517. Supports streaming job to run in the background. (Bochun Bai + via amareshwari) + + MAPREDUCE-1819. RaidNode is now smarter in submitting Raid jobs. (Ramkumar + Vadali via schen) + + MAPREDUCE-2132. A command line option in RaidShell to fix blocks using raid + + MAPREDUCE-2147. Remove redundant lines in JobInProgress ctor. + (Harsh J Chouraria via cutting) + + HADOOP-7007. Update the hudson-test-patch ant target to work with the + latest test-patch.sh script (gkesavan) + + MAPREDUCE-1818. RaidNode can specify scheduling parameters. (Ramkumar + Vadali via schen) + + MAPREDUCE-2051. Contribute a fair scheduler preemption system test. + (Todd Lipcon via tomwhite) + + MAPREDUCE-1892. RaidNode can allow layered policies more efficiently. + (Ramkumar Vadali via schen) + + MAPREDUCE-1592. Generate Eclipse's .classpath file from Ivy config. + (tomwhite via nigel) + + MAPREDUCE-2073. TestTrackerDistributedCacheManager should be up-front + about requirements on build environment. (Todd Lipcon via tomwhite) + + MAPREDUCE-2093. Herriot JT and TT clients should vend statistics. (cos) + + MAPREDUCE-2167. Faster directory traversal for raid node. (Ramkumar Vadali + via schen) + + MAPREDUCE-1931. Gridmix forrest documentation . (Ranjit Mathew via vinodkv). + + MAPREDUCE-2184. Port DistRaid.java to new mapreduce API. (Ramkumar Vadali + via schen) + + MAPREDUCE-1878. Add MRUnit documentation. (Aaron Kimball via tomwhite) + + MAPREDUCE-2180. Add coverage of fair scheduler servlet to system test (todd) + + MAPREDUCE-2250. Fix logging in raid code. (Ramkumar Vadali via schen) + + MAPREDUCE-2260. Remove auto-generated native build files. (rvs via eli) + + MAPREDUCE-2314. configure files that are generated as part of the released + tarball need to have executable bit set (rvs via cos) + + MAPREDUCE-1159. Limit Job name in web UI to be 80 char long. (Harsh J + Chouraria via szetszwo) + + MAPREDUCE-2337. Remove dependence of public MapReduce API on classes in + server package. (tomwhite) + + MAPREDUCE-2383. Improve documentation of DistributedCache methods (Harsh J + Chouraria via todd) + + MAPREDUCE-2222. Ivy resolve force mode should be turned off by default. + (Luke Lu via tomwhite) + + MAPREDUCE-2103. task-controller shouldn't require o-r permissions. + (todd via eli) + + MAPREDUCE-2505. Explain how to use ACLs in the fair scheduler. + (matei via eli) + + MAPREDUCE-3138. Add a utility to help applications bridge changes in + Context Objects APIs due to MAPREDUCE-954. (omalley via acmurthy) + + OPTIMIZATIONS + + MAPREDUCE-1354. Enhancements to JobTracker for better performance and + scalability. (Arun C. Murthy & Richard King via acmurthy) + + MAPREDUCE-1829. JobInProgress.findSpeculativeTask should use min() to + find the candidate instead of sort(). (Scott Chen via vinodkv) + + BUG FIXES + + MAPREDUCE-1845. FairScheduler.tasksToPreempt() can return negative number. + (Scott Chen via matei) + + MAPREDUCE-1707. TaskRunner can get NPE in getting ugi from TaskTracker. + (Vinod Kumar Vavilapalli) + + MAPREDUCE-1532. Ensures that delegation tokens is obtained as the + actual user when the proxy-user is used for submitting jobs. Also + refactors the DelegationTokenToRenew class. (ddas) + + MAPREDUCE-1558. Fixes MRAdmin to look up the principal of the + JobTracker and use that in the RefreshUserToGroupsMapping protocol and + RefreshAuthorizationPolicyProtocol. (Boris Shkolnik via ddas) + + MAPREDUCE-1662. Remove unused methods from TaskRunner. (Amareshwari + Sriramadasu via cdouglas) + + MAPREDUCE-1617. Use IPv4 stack for unit tests. (Amar Kamat and Luke Lu via + cdouglas) + + MAPREDUCE-1599. Fixes MRBench so that it reuses tokens across jobs + correctly. (Jitendra Nath Pandey via ddas) + + MAPREDUCE-1836. Refresh for proxy superuser config (mr part for HDFS-1096). + (Boris Shkolnik via shv) + + MAPREDUCE-1505. Create RPC client on job submission, not in cstr of Job + instance. (Dick King via cdouglas) + + MAPREDUCE-1813. NPE in PipeMapred.MRErrorThread. (Ravi Gummadi via vinodkv) + + MAPREDUCE-1225. Fixes DistributedCache to check if the file is fresh or not, + for the first localization also. (Zhong Wang via amareshwari) + + MAPREDUCE-1559. Fixes the token renewer to use the JobTracker's + credentials for talking to the NameNode. (ddas) + + MAPREDUCE-1492. Delete obsolete har files used on the parity files + of hdfs raid. (Rodrigo Schmidt via dhruba) + + MAPREDUCE-1857. Removes unused configuration parameters in streaming. + (amareshwari) + + MAPREDUCE-1887. MRAsyncDiskService now properly absolutizes volume root + paths. (Aaron Kimball via zshao) + + MAPREDUCE-1863. Fix NPE in Rumen when processing null CDF for failed task + attempts. (Amar Kamat via cdouglas) + + MAPREDUCE-1864. Removes uninitialized/unused variables in + org.apache.hadoop.streaming.PipeMapRed. (amareshwari) + + MAPREDUCE-1888. Fixes Streaming to override output key and value types, + only if mapper/reducer is a command. (Ravi Gummadi via amareshwari) + + MAPREDUCE-577. Fixes duplicate records in StreamXmlRecordReader. + (Ravi Gummadi via amareshwari) + + MAPREDUCE-1894. Fixed a bug in DistributedRaidFileSystem.readFully() + that was causing it to loop infinitely. (Ramkumar Vadali via dhruba) + + MAPREDUCE-1838. Reduce the time needed for raiding a bunch of files + by randomly assigning files to map tasks. (Ramkumar Vadali via dhruba) + + MAPREDUCE-1820. Fix InputSampler to clone sampled keys. (Alex Kozlov via + cdouglas) + + MAPREDUCE-1528. Incorporates the changes to the credentials API done in + HADOOP-6845. Also, introduces Credentials in JobConf, and in JobContext. + (Jitendra Pandey and Arun Murthy via ddas) + + MAPREDUCE-1865. Rumen should also support jobhistory files generated using + trunk. (Amar Kamat via amareshwari) + + MAPREDUCE-1621. Fixes NPE in TextOutputReader.getLastOutput if it has never + read any output. (amareshwari) + + MAPREDUCE-1911. Fixes errors in -info message in streaming. (amareshwari) + + MAPREDUCE-1772. Corrects errors in streaming documentation in forrest. + (amareshwari) + + MAPREDUCE-1925. Fix failing TestRumenJobTraces. (Ravi Gummadi via cdouglas) + + MAPREDUCE-1718. Fixes a bug in the construction of jobconf key for the + mapping that the tasks use at runtime for looking up delegation tokens. + (Boris Shkolnik via ddas) + + MAPREDUCE-1701. Fixes a problem to do with exception handling in + delegation-token renewals. (Boris Shkolnik via ddas) + + MAPREDUCE-1686. Fixes StreamUtil.goodClassOrNull to find classes without + package names. (Paul Burkhardt via amareshwari) + + MAPREDUCE-1288. Fixes TrackerDistributedCacheManager to take into account + the owner of the localized file in the mapping from cache URIs to + CacheStatus objects. (ddas) + + MAPREDUCE-1982. Fixes Rumen's TraceBuilder to extract job name from either + of configuration properties "mapreduce.job.name" and "mapred.job.name". + (Ravi Gummadi via amareshwari) + + MAPREDUCE-1958. The MapReduce part corresponding to the HADOOP-6873. + (Boris Shkolnik & Owen O'Malley via ddas) + + MAPREDUCE-1900. TaskTracker and JobTracker closes FileSystems, opened on + behalf of users that it no longer requires. (Kan Zhang and ddas via ddas) + + MAPREDUCE-1992. Fixes a problem to do with bringing up the JobTracker in + unsecure mode. (Kan Zhang via ddas) + + MAPREDUCE-1999. Fixes ClientProtocol to use the correct + DelegationTokenSelector. (Jitendra Pandey via ddas) + + MAPREDUCE-1780. AccessControlList.toString() is used for serialization of + ACL in JobStatus.java. (Ravi Gummadi via vinodkv) + + MAPREDUCE-1961. Fix ConcurrentModificationException in Gridmix during + shutdown. (Hong Tang via cdouglas) + + MAPREDUCE-2000. Fix parsing of JobHistory lines in Rumen when quotes are + escaped. (Hong Tang via cdouglas) + + MAPREDUCE-2022. Fixes compilation errors in TestSubmitJob. (amareshwari) + + MAPREDUCE-1670. RAID policies should not scan their own destination path. + (Ramkumar Vadali via dhruba) + + MAPREDUCE-1668. RaidNode Hars a directory only if all its parity files + have been created. (Ramkumar Vadali via dhruba) + + MAPREDUCE-2021. Fixes duplicate hostnames in CombineFileInputFormat's + split locations. (amareshwari) + + MAPREDUCE-1375. Fixes flaky test TestFileArgs. (Todd Lipcon via + amareshwari) + + MAPREDUCE-2023. TestDFSIO should not stop reading if curSize != bufferSize. + (Hong Tang via szetszwo) + + MAPREDUCE-2031. Fixes test failures TestTaskLauncher and + TestTaskTrackerLocalization. (Ravi Gummadi via amareshwari) + + MAPREDUCE-2046. Fixes CombineFileInputFormat to allow splits with size + less than DFS block size. (dhruba borthakur via amareshwari) + + MAPREDUCE-1975. Fixes unnecessary InterruptedException log in gridmix. + (Ravi Gummadi via amareshwari) + + MAPREDUCE-1597. Fixes CombineFileInputFormat to work with non-splittable + files. (amareshwari) + + MAPREDUCE-2032. Fixes TestJobCleanup to cleanup test directory in + tearDown. (Dick King via amareshwari) + + MAPREDUCE-1979. Fixes "Output directory already exists" error in gridmix + when gridmix.output.directory is not defined. (Ravi Gummadi via + amareshwari) + + MAPREDUCE-1918. Adds documentation to Rumen. (Amar Kamat via amareshwari) + + MAPREDUCE-2078. Fixes TraceBuilder to generate traces when a globbed job + history path is given. (Amar Kamat via amareshwari) + + MAPREDUCE-1989. Fixes error message in gridmix when user resolver is set + and no user list is given. (Ravi Gummadi via amareshwari) + + MAPREDUCE-2067. Distinct minicluster services (e.g. NN and JT) overwrite + each other's service policies. (Aaron T. Myers via tomwhite) + + MAPREDUCE-2029. DistributedRaidFileSystem removes itself from FileSystem + cache when it is closed. (Ramkumar Vadali via dhruba) + + MAPREDUCE-1816. HAR files used for RAID parity-bite have configurable + partfile size. (Ramkumar Vadali via dhruba) + + MAPREDUCE-2082. Fixes Pipes to create the jobtoken file in the right + place. (Jitendra Pandey via ddas) + + MAPREDUCE-2095. Fixes Gridmix to run from compressed traces. (Ranjit + Mathew via amareshwari) + + MAPREDUCE-1908. DistributedRaidFileSystem now handles ChecksumException + correctly. (Ramkumar Vadali via schen) + + MAPREDUCE-2126. JobQueueJobInProgressListener's javadoc is inconsistent + with source code. (Jingguo Yao via tomwhite) + + MAPREDUCE-2143. HarFileSystem is able to handle spaces in pathnames. + (Ramkumar Vadali via dhruba) + + MAPREDUCE-1867. Remove unused methods in + org.apache.hadoop.streaming.StreamUtil. (amareshwari via tomwhite) + + MAPREDUCE-2146. Raid does not affect access time of a source file. + (Ramkumar Vadali via dhruba) + + MAPREDUCE-2150. RaidNode periodically fixes corrupt blocks. (Ramkumar Vadali via + schen) + + MAPREDUCE-2099. RaidNode recreates outdated parity HARs. (Ramkumar Vadali + via schen) + + MAPREDUCE-2173. Fix race condition in TestBlockFixer that was + causing intermittent failure (Patrick Kling via dhruba) + + MAPREDUCE-2142. Refactor RaidNode so that the map-reduce component is + clearly separated out. (Patrick Kling via dhruba) + + MAPREDUCE-2179. Fix RaidBlockSender compilation failure. (Ramkumar Vadali + via schen) + + MAPREDUCE-2034. TestSubmitJob triggers NPE instead of permissions error. + (Todd Lipcon via tomwhite) + + MAPREDUCE-2195. New property for local conf directory in + system-test-mapreduce.xml file. (cos) + + MAPREDUCE-1783. FairScheduler initializes tasks only when the job can be + run. (Ramkumar Vadali via schen) + + MAPREDUCE-2224. Fix synchronization bugs in JvmManager. (todd) + + MAPREDUCE-714. JobConf.findContainingJar unescapes unnecessarily on linux (todd) + + MAPREDUCE-2096. Secure local filesystem IO from symlink vulnerabilities (todd) + + MAPREDUCE-2234. If Localizer can't create task log directory, it should fail + on the spot. (todd) + + MAPREDUCE-2219. JobTracker should not try to remove mapred.system.dir + during startup. (todd) + + MAPREDUCE-2207. Task-cleanup task should not be scheduled on the node that + the task just failed. (Liyin Liang via schen) + + MAPREDUCE-2084. Remove deprecate annotation for package file. The package + classes themselves are already deprecated. This removes an Eclipse error. + (tomwhite via nigel) + + MAPREDUCE-2248. DistributedRaidFileSystem should unraid only the corrupt + block (Ramkumar Vadali via schen) + + MAPREDUCE-1085. For tasks, "ulimit -v -1" is being run when user doesn't + specify a ulimit (todd) + + MAPREDUCE-2282. Fix TestMRServerPorts for the changes in + TestHDFSServerPorts. (shv via szetszwo) + + MAPREDUCE-2238. Fix permissions handling to avoid leaving undeletable + directories in local dirs. (todd) + + MAPREDUCE-2277. TestCapacitySchedulerWithJobTracker needs to wait for jobs + to complete before testing status. (todd) + + MAPREDUCE-2253. Servlets should specify content type (todd) + + MAPREDUCE-2283. Add timeout for Raid Tests (Ramkumar Vadali via schen) + + MAPREDUCE-1754. Replace mapred.persmissions.supergroup with an + acl : mapreduce.cluster.administrators (Amareshwari Sriramadasu via shv) + + MAPREDUCE-2256. FairScheduler fairshare preemption from multiple pools may + preempt all tasks from one pool causing that pool to go below fairshare. + (Priyo Mustafi via shv) + + MAPREDUCE-2281. MR part of HADOOP-6642. (Chris Douglas, Po Cheung via shv) + + MAPREDUCE-2200. TestUmbilicalProtocolWithJobToken is failing without Krb + evironment: needs to be conditional. (cos) + + MAPREDUCE-2077. Resolve name clash in the deprecated + o.a.h.util.MemoryCalculatorPlugin (Luke Lu via shv) + + MAPREDUCE-2188. The new API MultithreadedMapper doesn't initialize + RecordReader. (Owen O'Malley via shv) + + MAPREDUCE-1915. Fix IndexOutOfBoundsException in IndexCache. + (Priyo Mustafi via shv) + + MAPREDUCE-1974. Fix multiple preemtions of the same task in FairScheduler. + (Scott Chen via shv) + + MAPREDUCE-2304. Fix TestMRCLI to allow hostname with a hyphen (-). + (Priyo Mustafi via shv) + + MAPREDUCE-1825. jobqueue_details.jsp and FairSchedulerServelet should not + call finishedMaps and finishedReduces when job is not initialized. + (Scott Chen via shv) + + MAPREDUCE-2285. MiniMRCluster does not start after ant test-patch + (todd) + + MAPREDUCE-2315. javadoc is failing in nightly build (todd) + + MAPREDUCE-2054. Hierarchical queue implementation broke dynamic queue + addition in Dynamic Scheduler. (Thomas Sandholm via tomwhite) + + MAPREDUCE-2272. Job ACL file should not be executable + (Harsh J Chouraria via todd) + + MAPREDUCE-2241. ClusterWithLinuxTaskController should accept relative path + on the command line. (todd) + + MAPREDUCE-2251. Remove unused mapreduce.job.userhistorylocation config. + (Harsh J Chouraria via todd) + + MAPREDUCE-2284. TestLocalRunner.testMultiMaps times out (todd) + + MAPREDUCE-2336. Tool-related packages should be in the Tool javadoc group. + (tomwhite) + + MAPREDUCE-2394. JUnit output format doesn't propagate into raid contrib + build. (todd) + + MAPREDUCE-2392. TaskTracker shutdown in the tests sometimes take 60s. + (tomwhite) + + MAPREDUCE-2437. SLive uses only part* files to generating the final report. + (shv) + + MAPREDUCE-2428. start-mapred.sh script fails if HADOOP_HOME is not set. + (tomwhite via eli) + + MAPREDUCE-2445. Fix TestMiniMRWithDFSWithDistinctUsers to be a valid test. + (todd) + + MAPREDUCE-2457. Job submission should inject group.name on the JobTracker + (Alejandro Abdelnur via todd) + + MAPREDUCE-2472. Extra whitespace in mapred.child.java.opts breaks JVM + initialization. (Aaron T. Myers via todd) + + MAPREDUCE-2222. Ivy resolve force mode should be turned off by default. + (Luke Lu via tomwhite) + + MAPREDUCE-2486. Incorrect snapshot dependency published in .pom files + (todd) + + MAPREDUCE-2327. MapTask doesn't need to put username information in + SpillRecord. (todd via tomwhite) + + MAPREDUCE-2515. MapReduce code references some deprecated options + (Ari Rabkin via todd) + + MAPREDUCE-2487. ChainReducer uses MAPPER_BY_VALUE instead of + REDUCER_BY_VALUE. (Devaraj K via todd) + + MAPREDUCE-2185. Fix infinite loop at creating splits using + CombineFileInputFormat. (Ramkumar Vadali via schen) + + MAPREDUCE-2571. CombineFileInputFormat.getSplits throws a + java.lang.ArrayStoreException. (Bochun Bai via todd) + + MAPREDUCE-2767. Remove Linux task-controller. (Milind Bhandarkar via shv) + + MAPREDUCE-2991. queueinfo.jsp fails to show queue status for Capacity + scheduler if queue names contain special symbols. (Priyo Mustafi via shv) + + MAPREDUCE-2531. Fixed jobcontrol to downgrade JobID. (Robert Evans via + acmurthy) + + MAPREDUCE-3139. SlivePartitioner generates negative partitions. (jghoman) + +Release 0.21.1 - Unreleased + + NEW FEATURES + + MAPREDUCE-2040. Forrest Documentation for Dynamic Priority Scheduler. + (Thomas Sandholm via tomwhite) + + BUG FIXES + + MAPREDUCE-1897. trunk build broken on compile-mapred-test (cos) + + MAPREDUCE-1280. Update Eclipse plugin to the new eclipse.jdt API. + (Alex Kozlov via szetszwo) + + MAPREDUCE-1984. herriot TestCluster fails because exclusion is not there + (Balaji Rajagopalan via cos) + + MAPREDUCE-2090. Clover build doesn't generate per-test coverage. (cos) + + MAPREDUCE-2134. ant binary-system is broken in mapreduce project. (cos) + + MAPREDUCE-1905. Fixes Context.setStatus() and progress() apis. + (amareshwari) + + MAPREDUCE-1809. Ant build changes for Streaming system tests in contrib + projects. (Vinay Kumar Thota via amareshwari) + + MAPREDUCE-2223. TestMRCLI might fail on Ubuntu with default /etc/hosts + (cos) + + MAPREDUCE-2228. Remove java5 dependencies from build. (cos) + + MAPREDUCE-1929. Allow artifacts to be published to the staging Apache Nexus + Maven Repository. (tomwhite) + + MAPREDUCE-2317. Fix a NPE in HadoopArchives. (Devaraj K via szetszwo) + + MAPREDUCE-2127. mapreduce trunk builds are filing on hudson. + (Bruno Mahé via eli) + + MAPREDUCE-2779. JobSplitWriter.java can't handle large job.split file. + (Ming Ma via shv) + +Release 0.21.0 - 2010-08-13 + + INCOMPATIBLE CHANGES + + MAPREDUCE-516. Fix the starvation problem in the Capacity Scheduler + when running High RAM Jobs. (Arun Murthy via yhemanth) + + MAPREDUCE-358. Change org.apache.hadoop.examples. AggregateWordCount + and org.apache.hadoop.examples.AggregateWordHistogram to use new + mapreduce api. (Amareshwari Sriramadasu via sharad) + + MAPREDUCE-245. Change Job and jobcontrol classes to use the List interface + rather than ArrayList in APIs. (Tom White via cdouglas) + + MAPREDUCE-766. Enhanced list-blacklisted-trackers to display reasons + for blacklisting a node. (Sreekanth Ramakrishnan via yhemanth) + + MAPREDUCE-817. Add a cache for retired jobs with minimal job info and + provide a way to access history file url. (sharad) + + MAPREDUCE-711. Moved Distributed Cache from Common to Map/Reduce + project. (Vinod Kumar Vavilapalli via yhemanth) + + MAPREDUCE-895. Per the contract elucidated in HADOOP-6201, throw + FileNotFoundException from FileSystem::listStatus rather than returning + null. (Jakob Homan via cdouglas) + + MAPREDUCE-479. Provide full task id to map output servlet rather than the + reduce id, only. (Jiaqi Tan via cdouglas) + + MAPREDUCE-873. Simplify job recovery. Incomplete jobs are resubmitted on + jobtracker restart. Removes a public constructor in JobInProgress. (sharad) + + HADOOP-6230. Moved process tree and memory calculator related classes from + Common to Map/Reduce. (Vinod Kumar Vavilapalli via yhemanth) + + MAPREDUCE-157. Refactor job history APIs and change the history format to + JSON. (Jothi Padmanabhan via sharad) + + MAPREDUCE-849. Rename configuration properties. (Amareshwari Sriramadasu + via sharad) + + MAPREDUCE-1287. Only call the partitioner with more than one reducer. + (cdouglas) + + MAPREDUCE-1385. Use the new UserGroupInformation from HADOOP-6299. + (ddas via omalley) + + MAPREDUCE-1493. Authorization for job-history pages. (vinodkv) + + MAPREDUCE-1607. Task controller may not set permissions for a + task cleanup attempt's log directory (Amareshwari Sriramadasu via vinodkv) + + MAPREDUCE-1683. Remove JNI calls from ClusterStatus cstr. (Arun Murthy and + Luke Lu via cdouglas) + + MAPREDUCE-1855. Makes the refresh methods (for groups and proxy users) + independent of the client side configuration. (Boris Shkolnik via ddas) + + NEW FEATURES + + MAPREDUCE-1774. Large-scale Automated Framework (Sharad Agarwal, Sreekanth + Ramakrishnan, Konstantin Boudnik, at all via cos) + + MAPREDUCE-706. Support for FIFO pools in the fair scheduler. + (Matei Zaharia) + + MAPREDUCE-546. Provide sample fair scheduler config file in conf/ and use + it by default if no other config file is specified. (Matei Zaharia) + + MAPREDUCE-551. Preemption support in the Fair Scheduler. (Matei Zaharia) + + MAPREDUCE-567. Add a new example MR that always fails. (Philip Zeyliger + via tomwhite) + + MAPREDUCE-211. Provides ability to run a health check script on the + tasktracker nodes and blacklist nodes if they are unhealthy. + (Sreekanth Ramakrishnan via yhemanth) + + MAPREDUCE-637. Add an example, distbbp, which able to compute the n th bit + of Pi for some large n. (szetszwo) + + MAPREDUCE-532. Provide a way to limit the number of used slots + per queue in the capacity scheduler. + (Rahul Kumar Singh via yhemanth) + + MAPREDUCE-467. Provide ability to collect statistics about total tasks + and succeeded tasks in different time windows. (sharad) + + MAPREDUCE-740. Log a job-summary at the end of a job, while allowing it + to be configured to use a custom appender if desired. (acmurthy) + + MAPREDUCE-814. Provide a way to configure completed job history files + to be on HDFS. (sharad) + + MAPREDUCE-800. MRUnit should support the new API. (Aaron Kimball via + tomwhite) + + MAPREDUCE-798. MRUnit should be able to test a succession of MapReduce + passes. (Aaron Kimball via tomwhite) + + MAPREDUCE-768. Provide an option to dump jobtracker configuration in JSON + format to standard output. (V.V.Chaitanya Krishna via yhemanth) + + MAPREDUCE-824. Add support for a hierarchy of queues in the capacity + scheduler. (Rahul Kumar Singh via yhemanth) + + MAPREDUCE-751. Add Rumen, a tool for extracting statistics from job tracker + logs and generating job traces for simulation and analysis. (Dick King via + cdouglas) + + MAPREDUCE-830. Add support for splittable compression to TextInputFormats. + (Abdul Qadeer via cdouglas) + + MAPREDUCE-861. Add support for hierarchical queues in the Map/Reduce + framework. (Rahul Kumar Singh via yhemanth) + + MAPREDUCE-776. Add Gridmix, a benchmark processing Rumen traces to simulate + a measured mix of jobs on a cluster. (cdouglas) + + MAPREDUCE-862. Enhance JobTracker UI to display hierarchical queues. + (V.V.Chaitanya Krishna via yhemanth) + + MAPREDUCE-777. Brand new apis to track and query jobs as a + replacement for JobClient. (Amareshwari Sriramadasu via acmurthy) + + MAPREDUCE-775. Add native and streaming support for Vertica as an input + or output format taking advantage of parallel read and write properties of + the DBMS. (Omer Trajman via ddas) + + MAPREDUCE-679. XML-based metrics as JSP servlet for JobTracker. + (Aaron Kimball via tomwhite) + + MAPREDUCE-980. Modify JobHistory to use Avro for serialization. (cutting) + + MAPREDUCE-728. Add Mumak, a Hadoop map/reduce simulator. (Arun C Murthy, + Tamas Sarlos, Anirban Dasgupta, Guanying Wang, and Hong Tang via cdouglas) + + MAPREDUCE-1383. Automates fetching of delegation tokens in File*Formats + Distributed Cache and Distcp. Also, provides a config + mapreduce.job.hdfs-servers that the jobs can populate with a comma + separated list of namenodes. The job client automatically fetches + delegation tokens from those namenodes. (Boris Shkolnik via ddas) + + MAPREDUCE-698. Per-pool task limits for the fair scheduler. + (Kevin Peterson via matei) + + MAPREDUCE-1026. Does mutual authentication of the shuffle + transfers using a shared JobTracker generated key. + (Boris Shkolnik via ddas) + + MAPREDUCE-744. Introduces the notion of a public distributed cache. + (Devaraj Das) + + MAPREDUCE-1338. Introduces the notion of token cache using which + tokens and secrets can be sent by the Job client to the JobTracker. + (Boris Shkolnik via ddas) + + HDFS-503. This patch implements an optional layer over HDFS that + implements offline erasure-coding. It can be used to reduce the + total storage requirements of HDFS. (dhruba) + + MAPREDUCE-1432. Adds hooks in the jobtracker and tasktracker + for loading the tokens in the user's ugi. This is required + for the copying of files from the hdfs. (ddas) + + MAPREDUCE-1335. Adds SASL Kerberos/Digest authentication in MapReduce. + (Kan Zhang via ddas) + + MAPREDUCE-1464. Makes a compatible change in JobTokenIdentifier to + account for HADOOP-6510. (Jitendra Nath Pandey via ddas) + + MAPREDUCE-1433. Add a delegation token for MapReduce. (omalley) + + MAPREDUCE-1307. Introduces the Job level ACLs feature. + (Vinod Kumar Vavilapalli via ddas) + + MAPREDUCE-1430. JobTracker automatically renews delegation tokens for jobs. + (Boris Shkolnik via ddas) + + MAPREDUCE-1455. Introduces job-level authorization for mapreduce servlets. + (Ravi Gummadi via vinodkv) + + IMPROVEMENTS + + MAPREDUCE-463. Makes job setup and cleanup tasks as optional. + (Amareshwari Sriramadasu via sharad) + + MAPREDUCE-502. Allow jobtracker to be configured with zero completed jobs + in memory. (Amar Kamat via sharad) + + MAPREDUCE-416. Moves the history file to a "done" folder whenever a job + completes. (Amar Kamat via ddas) + + MAPREDUCE-646. Increase srcfilelist replication number in dictcp job. + (Ravi Gummadi via szetszwo) + + HADOOP-6106. Updated hadoop-core and test jars from hudson trunk + build #12. (Giridharan Kesavan) + + MAPREDUCE-642. A option to distcp that allows preserving the full + source path of a file in the specified destination directory. + (Rodrigo Schmidt via dhruba) + + MAPREDUCE-686. Move TestSpeculativeExecution.Fake* into a separate class + so that it can be used by other tests. (Jothi Padmanabhan via sharad) + + MAPREDUCE-625. Modify TestTaskLimits to improve execution time. + (Jothi Padmanabhan via sharad) + + MAPREDUCE-465. Deprecate o.a.h.mapred.lib.MultithreadedMapRunner and add + test for o.a.h.mapreduce.lib.MultithreadedMapper. + (Amareshwari Sriramadasu via sharad) + + MAPREDUCE-701. Improves the runtime of the TestRackAwareTaskPlacement + by making it a unit test. (Jothi Padmanabhan via ddas) + + MAPREDUCE-371. Change KeyFieldBasedComparator and KeyFieldBasedPartitioner + to use new api. (Amareshwari Sriramadasu via sharad) + + MAPREDUCE-623. Resolve javac warnings in mapreduce. (Jothi Padmanabhan + via sharad) + + MAPREDUCE-655. Change KeyValueLineRecordReader and KeyValueTextInputFormat + to use new mapreduce api. (Amareshwari Sriramadasu via sharad) + + MAPREDUCE-632. Merge TestCustomOutputCommitter with + TestCommandLineJobSubmission. (Jothi Padmanabhan via sharad) + + MAPREDUCE-627. Improves execution time of TestTrackerBlacklistAcrossJobs. + (Jothi Padmanabhan via ddas) + + MAPREDUCE-630. Improves execution time of TestKillCompletedJob. + (Jothi Padmanabhan via ddas) + + MAPREDUCE-626. Improves the execution time of TestLostTracker. + (Jothi Padmanabhan via ddas) + + MAPREDUCE-353. Makes the shuffle read and connection timeouts + configurable. (Ravi Gummadi via ddas) + + MAPREDUCE-739. Allow relative paths to be created in archives. (Mahadev + Konar via cdouglas) + + MAPREDUCE-772. Merge HADOOP-4010 changes to LineRecordReader into mapreduce + package. (Abdul Qadeer via cdouglas) + + MAPREDUCE-785. Separate sub-test of TestReduceFetch to be included in + MR-670. (Jothi Padmanabhan via cdouglas) + + MAPREDUCE-784. Modify TestUserDefinedCounters to use LocalJobRunner + instead of MiniMR. (Jothi Padmanabhan via sharad) + + HADOOP-6160. Fix releaseaudit target to run on specific directories. + (gkesavan) + + MAPREDUCE-782. Use PureJavaCrc32 in SpillRecord. (Todd Lipcon via + szetszwo) + + MAPREDUCE-369. Change org.apache.hadoop.mapred.lib.MultipleInputs to + use new api. (Amareshwari Sriramadasu via sharad) + + MAPREDUCE-373. Change org.apache.hadoop.mapred.lib.FieldSelectionMapReduce + to use new api. (Amareshwari Sriramadasu via sharad) + + MAPREDUCE-628. Improves the execution time of TestJobInProgress. + (Jothi Padmanabhan via ddas) + + MAPREDUCE-793. Creates a new test that consolidates a few tests to + include in the commit-test list. (Jothi Padmanabhan via ddas) + + MAPREDUCE-797. Adds combiner support to MRUnit MapReduceDriver. + (Aaron Kimball via johan) + + MAPREDUCE-656. Change org.apache.hadoop.mapred.SequenceFile* classes + to use new mapreduce api. (Amareshwari Sriramadasu via sharad) + + MAPREDUCE-670. Creates ant target for 10 mins patch test build. + (Jothi Padmanabhan via gkesavan) + + MAPREDUCE-375. Change org.apache.hadoop.mapred.lib.NLineInputFormat + and org.apache.hadoop.mapred.MapFileOutputFormat to use new api. + (Amareshwari Sriramadasu via ddas) + + MAPREDUCE-779. Added node health failure counts into + JobTrackerStatistics. (Sreekanth Ramakrishnan via yhemanth) + + MAPREDUCE-842. Setup secure permissions for localized job files, + intermediate outputs and log files on tasktrackers. + (Vinod Kumar Vavilapalli via yhemanth) + + MAPREDUCE-478. Allow map and reduce jvm parameters, environment variables + and ulimit to be set separately. + Configuration changes: + add mapred.map.child.java.opts + add mapred.reduce.child.java.opts + add mapred.map.child.env + add mapred.reduce.child.ulimit + add mapred.map.child.env + add mapred.reduce.child.ulimit + deprecated mapred.child.java.opts + deprecated mapred.child.env + deprecated mapred.child.ulimit + (acmurthy) + + MAPREDUCE-767. Remove the dependence on the CLI 2.0 snapshot. + (Amar Kamat via omalley) + + MAPREDUCE-712. Minor efficiency tweaks to RandomTextWriter. (cdouglas) + + MAPREDUCE-870. Remove the job retire thread and the associated + config parameters. (sharad) + + MAPREDUCE-874. Rename the PiEstimator example to QuasiMonteCarlo. + (szetszwo) + + MAPREDUCE-336. Allow logging level of map/reduce tasks to be configurable. + Configuration changes: + add mapred.map.child.log.level + add mapred.reduce.child.log.level + (acmurthy) + + MAPREDUCE-355. Update mapred.join package to use the new API. (Amareshwari + Sriramadasu via cdouglas) + + HADOOP-6184. Updated hadoop common and test jars to get the new API + in Configuration for dumping in JSON format from Hudson trunk build #68. + (yhemanth) + + MAPREDUCE-476. Extend DistributedCache to work locally (LocalJobRunner). + (Philip Zeyliger via tomwhite) + + MAPREDUCE-825. JobClient completion poll interval of 5s causes slow tests + in local mode. (Aaron Kimball via tomwhite) + + MAPREDUCE-910. Support counters in MRUnit. (Aaron Kimball via cdouglas) + + MAPREDUCE-788. Update gridmix2 to use the new API (Amareshwari Sriramadasu + via cdouglas) + + MAPREDUCE-875. Make DBRecordReader execute queries lazily. (Aaron Kimball + via enis) + + MAPREDUCE-318. Modularizes the shuffle code. (Jothi Padmanabhan and + Arun Murthy via ddas) + + MAPREDUCE-936. Allow a load difference for fairshare scheduler. + (Zheng Shao via dhruba) + + MAPREDUCE-370. Update MultipleOutputs to use the API, merge funcitonality + of MultipleOutputFormat. (Amareshwari Sriramadasu via cdouglas) + + MAPREDUCE-898. Changes DistributedCache to use the new API. + (Amareshwari Sriramadasu via ddas) + + MAPREDUCE-144. Includes dump of the process tree in task diagnostics when + a task is killed due to exceeding memory limits. + (Vinod Kumar Vavilapalli via yhemanth) + + MAPREDUCE-945. Modifies MRBench and TestMapRed to use ToolRunner so that + options such as queue name can be passed via command line. + (Sreekanth Ramakrishnan via yhemanth) + + MAPREDUCE-963. Deprecate o.a.h.mapred.FileAlreadyExistsException and + replace it with o.a.h.fs.FileAlreadyExistsException. (Boris Shkolnik + via szetszwo) + + MAPREDUCE-960. Remove an unnecessary intermediate copy and obsolete API + from KeyValueLineRecordReader. (cdouglas) + + MAPREDUCE-930. Modify Rumen to resolve paths in the canonical way, rather + than defaulting to the local filesystem. (cdouglas) + + MAPREDUCE-944. Extend the LoadManager API of the fair-share scheduler + to support regulating tasks for a job based on resources currently in use + by that job. (dhruba) + + MAPREDUCE-973. Move FailJob and SleepJob from examples to test. (cdouglas + via omalley) + + MAPREDUCE-966. Modify Rumen to clean up interfaces and simplify integration + with other tools. (Hong Tang via cdouglas) + + MAPREDUCE-856. Setup secure permissions for distributed cache files. + (Vinod Kumar Vavilapalli via yhemanth) + + MAPREDUCE-885. More efficient SQL queries for DBInputFormat. (Aaron Kimball + via enis) + + MAPREDUCE-284. Enables ipc.client.tcpnodelay in Tasktracker's Child. + (Ravi Gummadi via sharad) + + MAPREDUCE-916. Split the documentation to match the project split. + (Corinne Chandel via omalley) + + MAPREDUCE-649. Validate a copy by comparing the source and destination + checksums in distcp. Also adds an intra-task retry mechanism for errors + detected during the copy. (Ravi Gummadi via cdouglas) + + MAPREDUCE-654. Add a -dryrun option to distcp printing a summary of the + file data to be copied, without actually performing the copy. (Ravi Gummadi + via cdouglas) + + MAPREDUCE-664. Display the number of files deleted by distcp when the + -delete option is specified. (Ravi Gummadi via cdouglas) + + MAPREDUCE-781. Let the name of distcp jobs be configurable. (Venkatesh S + via cdouglas) + + MAPREDUCE-975. Add an API in job client to get the history file url for + a given job id. (sharad) + + MAPREDUCE-905. Add Eclipse launch tasks for MapReduce. (Philip Zeyliger + via tomwhite) + + MAPREDUCE-277. Makes job history counters available on the job history + viewers. (Jothi Padmanabhan via ddas) + + MAPREDUCE-893. Provides an ability to refresh queue configuration + without restarting the JobTracker. + (Vinod Kumar Vavilapalli and Rahul Kumar Singh via yhemanth) + + MAPREDUCE-1011. Add build.properties to svn and git ignore. (omalley) + + MAPREDUCE-954. Change Map-Reduce context objects to be interfaces. + (acmurthy) + + MAPREDUCE-639. Change Terasort example to reflect the 2009 updates. + (omalley) + + MAPREDUCE-1063. Document gridmix benchmark. (cdouglas) + + MAPREDUCE-931. Use built-in interpolation classes for making up task + runtimes in Rumen. (Dick King via cdouglas) + + MAPREDUCE-1012. Mark Context interfaces as public evolving. (Tom White via + cdouglas) + + MAPREDUCE-971. Document use of distcp when copying to s3, managing timeouts + in particular. (Aaron Kimball via cdouglas) + + HDFS-663. DFSIO for append. (shv) + + HDFS-641. Move all of the components that depend on map/reduce to + map/reduce. (omalley) + + HADOOP-5107. Use Maven ant tasks to publish artifacts. (Giridharan Kesavan + via omalley) + + MAPREDUCE-1229. Allow customization of job submission policy in Mumak. + (Hong Tang via cdouglas) + + MAPREDUCE-1317. Reduce the memory footprint of Rumen objects by interning + host Strings. (Hong Tang via cdouglas) + + MAPREDUCE-1097. Add support for Vertica 3.5 to its contrib module. (Omer + Trajman via cdouglas) + + MAPREDUCE-1627. HadoopArchives should not uses a method in DistCp. + (szetszwo) + + MAPREDUCE-1198. Alternatively schedule different types of tasks in + fair share scheduler. (Scott Chen via matei) + + MAPREDUCE-707. Provide a jobconf property for explicitly assigning a job to + a pool in the Fair Scheduler. (Alan Heirich via matei) + + MAPREDUCE-947. Added commitJob and abortJob apis to OutputCommitter. + Enhanced FileOutputCommitter to create a _SUCCESS file for successful + jobs. (Amar Kamat & Jothi Padmanabhan via acmurthy) + + MAPREDUCE-1103. Added more metrics to Jobtracker. (sharad) + + MAPREDUCE-1048. Add occupied/reserved slot usage summary on jobtracker UI. + (Amareshwari Sriramadasu and Hemanth Yamijala via sharad) + + MAPREDUCE-1090. Modified log statement in TaskMemoryManagerThread to + include task attempt id. (yhemanth) + + MAPREDUCE-1189. Reduce ivy console output to ovservable level (cos) + + MAPREDUCE-1167. ProcfsBasedProcessTree collects rss memory information. + (Scott Chen via dhruba) + + MAPREDUCE-1231. Added a new DistCp option, -skipcrccheck, so that the CRC + check during setup can be skipped. (Jothi Padmanabhan via szetszwo) + + MAPREDUCE-1190. Add package documentation for BBP example. + (Tsz Wo (Nicholas) Sze via cdouglas) + + MAPREDUCE-1119. When tasks fail to report status, show tasks's stack dump + before killing. (Aaron Kimball via tomwhite) + + MAPREDUCE-1185. Redirect running job url to history url if job is already + retired. (Amareshwari Sriramadasu and Sharad Agarwal via sharad) + + MAPREDUCE-1050. Introduce a mock object testing framework. (tomwhite) + + MAPREDUCE-1084. Implementing aspects development and fault injeciton + framework for MapReduce. (Sreekanth Ramakrishnan via cos) + + MAPREDUCE-1209. Move common specific part of the test TestReflectionUtils + out of mapred into common. (Todd Lipcon via tomwhite) + + MAPREDUCE-967. TaskTracker does not need to fully unjar job jars. + (Todd Lipcon via tomwhite) + + MAPREDUCE-1083. Changes in MapReduce so that group information of users + can be refreshed in the JobTracker via command line. + (Boris Shkolnik via ddas) + + MAPREDUCE-181. Changes the job submission process to be secure. + (Devaraj Das) + + MAPREDUCE-1250. Refactors the JobToken to use Common's Token interface. + (Kan Zhang via ddas) + + MAPREDUCE-896. Enhance tasktracker to cleanup files that might have + been created by user tasks with non-writable permissions. + (Ravi Gummadi via yhemanth) + + MAPREDUCE-372. Change org.apache.hadoop.mapred.lib.ChainMapper/Reducer + to use new mapreduce api. (Amareshwari Sriramadasu via sharad) + + MAPREDUCE-1295. Add a tool in Rumen for folding and manipulating job + traces. (Dick King via cdouglas) + + MAPREDUCE-1302. TrackerDistributedCacheManager deletes file + asynchronously, thus reducing task initialization delays. + (Zheng Shao via dhruba) + + MAPREDUCE-1218. TaskTrackers send cpu and memory usage of + node to JobTracker. (Scott Chen via dhruba) + + MAPREDUCE-847. Fix Releaseaudit warning count to zero + (Giridharan Kesavan) + + MAPREDUCE-1337. Use generics in StreamJob to improve readability of that + class. (Kay Kay via cdouglas) + + MAPREDUCE-361. Port terasort example to the new mapreduce API. (Amareshwari + Sriramadasu via cdouglas) + + MAPREDUCE-1367. LocalJobRunner should support parallel mapper execution. + (Aaron Kimball via tomwhite) + + MAPREDUCE-64. Eliminate io.sort.record.percent from MapTask configuration. + (cdouglas) + + MAPREDUCE-1440. Replace the long user name in MapReduce with the local + name. (omalley) + + MAPREDUCE-1470. Move delegation tokens from HDFS to Common so that + MapReduce can use them too. (omalley) + + MAPREDUCE-1425. Reduce memory usage by archive. (mahadev via szetszwo) + + MAPREDUCE-1441. Trim whitespace from directory lists pulled from the + configuration. (Todd Lipcon via cdouglas) + + MAPREDUCE-1309. Refactor Rumen trace generator to improve code structure + and add extensible support for log formats. (Dick King via cdouglas) + + MAPREDUCE-1503. Delegation token renewing and cancelling should provide + meaningful exceptions when there are failures instead of returning + false. (omalley) + + HADOOP-6579. Upgrade commons-codec library to 1.4. (omalley) + + MAPREDUCE-1423. Improve performance of CombineFileInputFormat when multiple + pools are configured. (Dhruba Borthakur via zshao) + + MAPREDUCE-1454. Quote user supplied strings in Tracker servlets. (cdouglas) + + MAPREDUCE-1408. Add customizable job submission policies to Gridmix. (Rahul + Singh via cdouglas) + + MAPREDUCE-1527. Better warning logged when mapred.queue.names is + overshadowed by mapred-queues.xml. (Hong Tang via acmurthy) + + MAPREDUCE-1403. Save the size and number of distributed cache artifacts in + the configuration. (Arun Murthy via cdouglas) + + MAPREDUCE-1482. Truncate state string and diagnostic information in + TaskStatus. (Amar Kamat via szetszwo) + + MAPREDUCE-1593. [Rumen] Improvements to random seed generation (tamas via + mahadev) + + MAPREDUCE-1460. Oracle support in DataDrivenDBInputFormat. + (Aaron Kimball via tomwhite) + + MAPREDUCE-1569. Pass configuration through mocked contexts in MRUnit. + (Chris White via cdouglas) + + MAPREDUCE-1590. Move HarFileSystem from Hadoop Common to Mapreduce tools. + (mahadev) + + MAPREDUCE-1629. Get rid of fakeBlockLocations() on HarFileSystem, since + it's not used (mahadev) + + MAPREDUCE-1489. DataDrivenDBInputFormat should not query the database + when generating only one split. (Aaron Kimball via tomwhite) + + MAPREDUCE-1514. Add documentation on replication, permissions, new options, + limitations and internals of har. (mahadev via szetszwo) + + MAPREDUCE-1428. Make block size and the size of archive created files + configurable. (mahadev via szetszwo) + + MAPREDUCE-1656. JobStory should provide queue info. (hong via mahadev) + + MAPREDUCE-1466. Record number of files processed in FileInputFormat in the + Configuration for offline analysis. (Luke Lu and Arun Murthy via cdouglas) + + MAPREDUCE-1538. TrackerDistributedCacheManager manages the + number of files. (Scott Chen via dhruba) + + MAPREDUCE-1673. Scripts to start and stop RaidNode. + (Rodrigo Schmidt via dhruba) + + MAPREDUCE-1659. RaidNode writes temp files on configured tmp directory and + add random numbers to their names to avoid conflicts + (Rodrigo Schmidt via dhruba) + + MAPREDUCE-1221. Allow admins to control physical memory limits per-task + and per-node. (Scott Chen via acmurthy) + + MAPREDUCE-1065. Update mapred tutorial to use the new API. (Aaron Kimball + via cdouglas) + + MAPREDUCE-1304. Add a task counter tracking time spent in GC. (Aaron + Kimball via cdouglas) + + MAPREDUCE-1570. Add grouping comparators to MRUnit. (Chris White via + cdouglas) + + MAPREDUCE-1650. Exclude Private elements from generated MapReduce + Javadoc. (tomwhite) + + MAPREDUCE-1625. Improve grouping of packages in Javadoc. (tomwhite) + + MAPREDUCE-1417. Forrest documentation should be updated to reflect + the changes in MAPREDUCE-744. (Ravi Gummadi via vinodkv) + + MAPREDUCE-1568. TrackerDistributedCacheManager should clean up cache + in a background thread. (Scott Chen via zshao) + + MAPREDUCE-1749. Move configuration strings out of JobContext so that it + can be made public stable. (omalley) + + MAPREDUCE-1623. Apply audience and stability notations to Hadoop + Map-Reduce. (tomwhite via acmurthy) + + MAPREDUCE-1751. Change MapReduce to depend on Hadoop 'common' artifacts + instead of 'core'. (tomwhite) + + MAPREDUCE-1535. Replace usage of FileStatus#isDir(). (Eli Collins via + tomwhite) + + MAPREDUCE-1832. Allow file sizes less than 1MB in DFSIO benchmark. (shv) + + MAPREDUCE-1404. Move Cluster-Setup and Single-Node-Setup Docs from + MapReduce to Common. (tomwhite) + + MAPREDUCE-1697. Document the behavior of -file option and deprecate it + in favour of -files option in streaming. (Amareshwari Sriramadasu + via vinodkv) + + MAPREDUCE-1033. Resolve location of scripts and configuration files after + project split. (tomwhite) + + MAPREDUCE-1018. Document changes to the memory management and scheduling + model. (Hemanth Yamijala via vinodkv) + + MAPREDUCE-1896. [Herriot] New property for multi user list. (Vinay Thota + via cos) + + MAPREDUCE-1812. New properties for suspend and resume process. (Vinay + Thota via cos) + + OPTIMIZATIONS + + MAPREDUCE-270. Fix the tasktracker to optionally send an out-of-band + heartbeat on task-completion for better job-latency. (acmurthy) + Configuration changes: + add mapreduce.tasktracker.outofband.heartbeat + + MAPREDUCE-1186. Modified code in distributed cache to set permissions + only on required set of localized paths. + (Amareshwari Sriramadasu via yhemanth) + + MAPREDUCE-1501. FileInputFormat supports multi-level, recursive + directory listing. (Zheng Shao via dhruba) + + MAPREDUCE-1556. upgrade to Avro 1.3.0. (cutting via tomwhite) + + MAPREDUCE-1613. Install/deploy source jars to Maven repo + (Patrick Angeles via ddas) + + MAPREDUCE-1610. Forrest documentation should be updated to reflect + the changes in MAPREDUCE-856. (Ravi Gummadi via vinodkv) + + MAPREDUCE-1853. Adds caching for TaskAttemptContext in MultipleOutputs. + (Torsten Curdt via amareshwari) + + BUG FIXES + + MAPREDUCE-878. Rename fair scheduler design doc to + fair-scheduler-design-doc.tex and add Apache license header (matei) + + HADOOP-4687. MapReduce is split from Hadoop Core. It is a subproject under + Hadoop (Owen O'Malley) + + HADOOP-6096. Fix Eclipse project and classpath files following project + split. (tomwhite) + + MAPREDUCE-419. Reconcile mapred.userlog.limit.kb defaults in configuration + and code. (Philip Zeyliger via cdouglas) + + MAPREDUCE-2. Fixes a bug in KeyFieldBasedPartitioner in handling empty + keys. (Amar Kamat via sharad) + + MAPREDUCE-130. Delete the jobconf copy from the log directory of the + JobTracker when the job is retired. (Amar Kamat via sharad) + + MAPREDUCE-657. Fix hardcoded filesystem problem in CompletedJobStatusStore. + (Amar Kamat via sharad) + + MAPREDUCE-179. Update progress in new RecordReaders. (cdouglas) + + MAPREDUCE-658. Replace NPE in distcp with a meaningful error message when + the source path does not exist. (Ravi Gummadi via cdouglas) + + MAPREDUCE-671. Update ignore list to include untracked, generated + build artifacts and config files. (cdouglas) + + MAPREDUCE-433. Use more reliable counters in TestReduceFetch. (cdouglas) + + MAPREDUCE-124. Fix a bug in failure handling of abort task of + OutputCommiter. (Amareshwari Sriramadasu via sharad) + + MAPREDUCE-694. Fix to add jsp-api jars to capacity-scheduler classpath. + (Giridharan Kesavan) + + MAPREDUCE-702. Fix eclipse-plugin jar target (Giridharan Kesavan) + + MAPREDUCE-522. Replace TestQueueCapacities with simpler test case to + test integration between capacity scheduler and MR framework. + (Sreekanth Ramakrishnan via yhemanth) + + MAPREDUCE-683. Fixes an initialization problem in the JobHistory. + The initialization of JobHistoryFilesManager is now done in the + JobHistory.init call. (Amar Kamat via ddas) + + MAPREDUCE-708. Fixes a bug to allow updating the reason for + blacklisting a node on the JobTracker UI. + (Sreekanth Ramakrishnan via yhemanth) + + MAPREDUCE-709. Fixes message displayed for a blacklisted node where + the reason for blacklisting is due to the health check script + timing out. (Sreekanth Ramakrishnan via yhemanth) + + MAPREDUCE-676. Existing diagnostic rules fail for MAP ONLY jobs. + (Suhas Gogate via tomwhite) + + MAPREDUCE-722. Fixes a bug with tasktracker reservations for + high memory jobs in capacity scheduler. + (Vinod Kumar Vavilapalli via yhemanth) + + HADOOP-6090. Updates gridmix script to use new mapreduce api output + format. (Amareshwari Sriramadasu via sharad) + + MAPREDUCE-732. Removed spurious log statements in the node + blacklisting logic. (Sreekanth Ramakrishnan via yhemanth) + + MAPREDUCE-734. Fix a ConcurrentModificationException in unreserving + unused reservations for a job when it completes. + (Arun Murthy and Sreekanth Ramakrishnan via yhemanth) + + MAPREDUCE-733. Fix a RuntimeException while unreserving trackers + that are blacklisted for a job. + (Arun Murthy and Sreekanth Ramakrishnan via yhemanth) + + MAPREDUCE-677. Fix timeout in TestNodeRefresh. (Amar Kamat via + sharad) + + MAPREDUCE-153. Fix timeout in TestJobInProgressListener. (Amar + Kamat via sharad) + + MAPREDUCE-742. Fix output messages and java comments in the Pi related + examples. (szetszwo) + + MAPREDUCE-565. Fix partitioner to work with new API. (Owen O'Malley via + cdouglas) + + MAPREDUCE-680. Fix so MRUnit can handle reuse of Writable objects. + (Aaron Kimball via johan) + + MAPREDUCE-18. Puts some checks for cross checking whether a reduce + task gets the correct shuffle data. (Ravi Gummadi via ddas) + + MAPREDUCE-771. Fix scheduling of setup and cleanup tasks to use + free slots instead of tasks for scheduling. (yhemanth) + + MAPREDUCE-717. Fixes some corner case issues in speculative + execution heuristics. (Devaraj Das) + + MAPREDUCE-716. Make DBInputFormat work with Oracle. (Aaron Kimball + via tomwhite) + + MAPREDUCE-735. Fixes a problem in the KeyFieldHelper to do with + the end index for some inputs (Amar Kamat via ddas) + + MAPREDUCE-682. Removes reservations on tasktrackers which are + blacklisted. (Sreekanth Ramakrishnan via yhemanth) + + MAPREDUCE-743. Fixes a problem to do with progress reporting + in the map phase. (Ravi Gummadi via ddas) + + MAPREDUCE-765. Eliminate the deprecated warnings introduced by H-5438. + (He Yongqiang via szetszwo) + + MAPREDUCE-383. Fix a bug in Pipes combiner due to bytes count not + getting reset after the spill. (Christian Kunz via sharad) + + MAPREDUCE-809. Fix job-summary logs to correctly record status of FAILED + and KILLED jobs. (acmurthy) + + MAPREDUCE-792. Fix unchecked warnings in DBInputFormat. (Aaron Kimball + via szetszwo) + + MAPREDUCE-760. Fix a timing issue in TestNodeRefresh. (Amar Kamat via + sharad) + + MAPREDUCE-40. Keep memory management backwards compatible for job + configuration parameters and limits. (Rahul Kumar Singh via yhemanth) + + MAPREDUCE-587. Fixes a OOM issue in TestStreamingExitStatus. + (Amar Kamat via ddas) + + MAPREDUCE-408. Fixes an assertion problem in TestKillSubProcesses + (Ravi Gummadi via ddas) + + MAPREDUCE-659. Fix gridmix2 compilation. (Giridharan Kesavan) + + MAPREDUCE-796. Fixes a ClassCastException in an exception log in + MultiThreadedMapRunner. (Amar Kamat via ddas) + + MAPREDUCE-808. Fixes a serialization problem in TypedBytes. + (Klaas Bosteels via ddas) + + MAPREDUCE-845. Fix a findbugs heap size problem in build.xml and add + a new property findbugs.heap.size. (Lee Tucker via szetszwo) + + MAPREDUCE-838. Fixes a problem in the way commit of task outputs + happens. The bug was that even if commit failed, the task would + be declared as successful. (Amareshwari Sriramadasu via ddas) + + MAPREDUCE-813. Updates Streaming and M/R tutorial documents. + (Corinne Chandel via ddas) + + MAPREDUCE-805. Fixes some deadlocks in the JobTracker due to the fact + the JobTracker lock hierarchy wasn't maintained in some JobInProgress + method calls. (Amar Kamat via ddas) + + MAPREDUCE-799. Fixes so all of the MRUnit self-tests run. + (Aaron Kimball via johan) + + MAPREDUCE-848. Fixes a problem to do with TestCapacityScheduler + failing (Amar Kamat via ddas) + + MAPREDUCE-840. DBInputFormat leaves open transaction. + (Aaron Kimball via tomwhite) + + MAPREDUCE-859. Adds Avro and its dependencies required by Hadoop + common. (Ravi Gummadi via sharad) + + MAPREDUCE-867. Fix ivy conf to look for avro jar from maven repo. + (Giridharan Kesavan) + + MAPREDUCE-877. Added avro as a dependency to contrib ivy settings. + (Tsz Wo (Nicholas) Sze via yhemanth) + + MAPREDUCE-852. In build.xml, remove the Main-Class, which is incorrectly + set in tools, and rename the target "tools-jar" to "tools". (szetszwo) + + MAPREDUCE-773. Sends progress reports for compressed gzip inputs in maps. + Fixes a native direct buffer leak in LineRecordReader classes. + (Hong Tang and ddas) + + MAPREDUCE-832. Reduce number of warning messages printed when + deprecated memory variables are used. (Rahul Kumar Singh via yhemanth) + + MAPREDUCE-745. Fixes a testcase problem to do with generation of JobTracker + IDs. (Amar Kamat via ddas) + + MAPREDUCE-834. Enables memory management on tasktrackers when old + memory management parameters are used in configuration. + (Sreekanth Ramakrishnan via yhemanth) + + MAPREDUCE-818. Fixes Counters#getGroup API. (Amareshwari Sriramadasu + via sharad) + + MAPREDUCE-807. Handles the AccessControlException during the deletion of + mapred.system.dir in the JobTracker. The JobTracker will bail out if it + encounters such an exception. (Amar Kamat via ddas) + + MAPREDUCE-430. Fix a bug related to task getting stuck in case of + OOM error. (Amar Kamat via ddas) + + MAPREDUCE-871. Fix ownership of Job/Task local files to have correct + group ownership according to the egid of the tasktracker. + (Vinod Kumar Vavilapalli via yhemanth) + + MAPREDUCE-911. Fix a bug in TestTaskFail related to speculative + execution. (Amareshwari Sriramadasu via sharad) + + MAPREDUCE-687. Fix an assertion in TestMiniMRMapRedDebugScript. + (Amareshwari Sriramadasu via sharad) + + MAPREDUCE-924. Fixes the TestPipes testcase to use Tool. + (Amareshwari Sriramadasu via sharad) + + MAPREDUCE-903. Add Avro jar to eclipse classpath. + (Philip Zeyliger via tomwhite) + + MAPREDUCE-943. Removes a testcase in TestNodeRefresh that doesn't make + sense in the new Job recovery model. (Amar Kamat via ddas) + + MAPREDUCE-764. TypedBytesInput's readRaw() does not preserve custom type + codes. (Klaas Bosteels via tomwhite) + + HADOOP-6243. Fixes a NullPointerException in handling deprecated keys. + (Sreekanth Ramakrishnan via yhemanth) + + MAPREDUCE-968. NPE in distcp encountered when placing _logs directory on + S3FileSystem. (Aaron Kimball via tomwhite) + + MAPREDUCE-826. harchive doesn't use ToolRunner / harchive returns 0 even + if the job fails with exception (koji Noguchi via mahadev) + + MAPREDUCE-839. unit test TestMiniMRChildTask fails on mac os-x (hong tang + via mahadev) + + MAPREDUCE-112. Add counters for reduce input, output records to the new API. + (Jothi Padmanabhan via cdouglas) + + MAPREDUCE-648. Fix two distcp bugs: (1) it should not launch a job if all + src paths are directories, and (2) it does not skip copying when updating + a single file. (Ravi Gummadi via szetszwo) + + MAPREDUCE-946. Fix a regression in LineRecordReader where the + maxBytesToConsume parameter is not set correctly. (cdouglas) + + MAPREDUCE-977. Missing jackson jars from Eclipse template. (tomwhite) + + MAPREDUCE-988. Fix a packaging issue in the contrib modules. (Hong Tang via + cdouglas) + + MAPREDUCE-971. distcp does not always remove distcp.tmp.dir. (Aaron Kimball + via tomwhite) + + MAPREDUCE-995. Fix a bug in JobHistory where tasks completing after the job + is closed cause a NPE. (Jothi Padmanabhan via cdouglas) + + MAPREDUCE-953. Fix QueueManager to dump queue configuration in JSON format. + (V.V. Chaitanya Krishna via yhemanth) + + MAPREDUCE-645. Prevent distcp from running a job when the destination is a + file, but the source is not. (Ravi Gummadi via cdouglas) + + MAPREDUCE-1002. Flushed writer in JobQueueClient so queue information is + printed correctly. (V.V. Chaitanya Krishna via yhemanth) + + MAPREDUCE-1003. Fix compilation problem in eclipse plugin when + eclipse.home is set. (Ravi Gummadi via yhemanth) + + MAPREDUCE-941. Vaidya script fails on Solaris. (Chad Metcalf + via tomwhite) + + MAPREDUCE-912. Add and standardize Apache license headers. (Chad Metcalf + via cdouglas) + + MAPREDUCE-1022. Fix compilation of vertica testcases. (Vinod Kumar + Vavilapalli via acmurthy) + + MAPREDUCE-1000. Handle corrupt history files in JobHistory.initDone(). + (Jothi Padmanabhan via sharad) + + MAPREDUCE-1028. Fixed number of slots occupied by cleanup tasks to one + irrespective of slot size for the job. + (Ravi Gummadi via yhemanth) + + MAPREDUCE-964. Fixed start and finish times of TaskStatus to be + consistent, thereby fixing inconsistencies in metering tasks. + (Sreekanth Ramakrishnan via yhemanth) + + MAPREDUCE-1076. Deprecate ClusterStatus and add javadoc in ClusterMetrics. + (Amareshwari Sriramadasu via sharad) + + MAPREDUCE-979. Fixed JobConf APIs related to memory parameters to return + values of new configuration variables when deprecated variables are + disabled. (Sreekanth Ramakrishnan via yhemanth) + + MAPREDUCE-1030. Modified scheduling algorithm to return a map and reduce + task per heartbeat in the capacity scheduler. + (Rahul Kumar Singh via yhemanth) + + MAPREDUCE-1071. Use DataInputStream rather than FSDataInputStream in the + JobHistory EventReader. (Hong Tang via cdouglas) + + MAPREDUCE-986. Fix Rumen to work with truncated task lines. (Dick King via + cdouglas) + + MAPREDUCE-1029. Fix failing TestCopyFiles by restoring the unzipping of + HDFS webapps from the hdfs jar. (Aaron Kimball and Jothi Padmanabhan via + cdouglas) + + MAPREDUCE-769. Make findbugs and javac warnings to zero. + (Amareshwari Sriramadasu via sharad) + + MAPREDUCE-1104. Initialize RecoveryManager in JobTracker cstr called by + Mumak. (Hong Tang via cdouglas) + + MAPREDUCE-1061. Add unit test validating byte specifications for gridmix + jobs. (cdouglas) + + MAPREDUCE-1077. Fix Rumen so that truncated tasks do not mark the job as + successful. (Dick King via cdouglas) + + MAPREDUCE-1041. Make TaskInProgress::taskStatuses map package-private. + (Jothi Padmanabhan via cdouglas) + + MAPREDUCE-1070. Prevent a deadlock in the fair scheduler servlet. + (Todd Lipcon via cdouglas) + + MAPREDUCE-1086. Setup Hadoop logging environment for tasks to point to + task related parameters. (Ravi Gummadi via yhemanth) + + MAPREDUCE-1105. Remove max limit configuration in capacity scheduler in + favor of max capacity percentage thus allowing the limit to go over + queue capacity. (Rahul Kumar Singh via yhemanth) + + MAPREDUCE-1016. Make the job history log format JSON. (cutting) + + MAPREDUCE-1038. Weave Mumak aspects only if related files have changed. + (Aaron Kimball via cdouglas) + + MAPREDUCE-1163. Remove unused, hard-coded paths from libhdfs. (Allen + Wittenauer via cdouglas) + + MAPREDUCE-962. Fix a NullPointerException while killing task process + trees. (Ravi Gummadi via yhemanth) + + MAPREDUCE-1177. Correct setup/cleanup inversion in + JobTracker::getTaskReports. (Vinod Kumar Vavilapalli via cdouglas) + + MAPREDUCE-1178. Fix ClassCastException in MultipleInputs by adding + a DelegatingRecordReader. (Amareshwari Sriramadasu and Jay Booth + via sharad) + + MAPREDUCE-1068. Fix streaming job to show proper message if file is + is not present. (Amareshwari Sriramadasu via sharad) + + MAPREDUCE-1147. Add map output counters to new API. (Amar Kamat via + cdouglas) + + MAPREDUCE-915. The debug scripts are run as the job user. (ddas) + + MAPREDUCE-1007. Fix NPE in CapacityTaskScheduler.getJobs(). + (V.V.Chaitanya Krishna via sharad) + + MAPREDUCE-28. Refactor TestQueueManager and fix default ACLs. + (V.V.Chaitanya Krishna and Rahul K Singh via sharad) + + MAPREDUCE-1182. Fix overflow in reduce causing allocations to exceed the + configured threshold. (cdouglas) + + MAPREDUCE-1239. Fix contrib components build dependencies. + (Giridharan Kesavan and omalley) + + MAPREDUCE-787. Fix JobSubmitter to honor user given symlink path. + (Amareshwari Sriramadasu via sharad) + + MAPREDUCE-1249. Update config default value for socket read timeout to + match code default. (Amareshwari Sriramadasu via cdouglas) + + MAPREDUCE-1161. Remove ineffective synchronization in NotificationTestCase. + (Owen O'Malley via cdouglas) + + MAPREDUCE-1244. Fix eclipse-plugin's build dependencies. (gkesavan) + + MAPREDUCE-1075. Fix JobTracker to not throw an NPE for a non-existent + queue. (V.V.Chaitanya Krishna via yhemanth) + + MAPREDUCE-754. Fix NPE in expiry thread when a TT is lost. (Amar Kamat + via sharad) + + MAPREDUCE-1074. Document Reducer mark/reset functionality. (Jothi + Padmanabhan via cdouglas) + + MAPREDUCE-1267. Fix typo in mapred-default.xml. (Todd Lipcon via cdouglas) + + MAPREDUCE-952. Remove inadvertently reintroduced Task.Counter enum. (Jothi + Padmanabhan via cdouglas) + + MAPREDUCE-1230. Fix handling of null records in VerticaInputFormat. (Omer + Trajman via cdouglas) + + MAPREDUCE-1171. Allow shuffle retries and read-error reporting to be + configurable. (Amareshwari Sriramadasu via acmurthy) + + MAPREDUCE-879. Fix broken unit test TestTaskTrackerLocalization on MacOS. + (Sreekanth Ramakrishnan via yhemanth) + + MAPREDUCE-1124. Fix imprecise byte counts in Gridmix. (cdouglas) + + MAPREDUCE-1222. Add an option to exclude numeric IP addresses in topologies + processed by Mumak. (Hong Tang via cdouglas) + + MAPREDUCE-1284. Fix fts_open() call in task-controller that was failing + LinuxTaskController unit tests. (Ravi Gummadi via yhemanth) + + MAPREDUCE-1143. Fix running task counters to be updated correctly + when speculative attempts are running for a TIP. + (Rahul Kumar Singh via yhemanth) + + MAPREDUCE-1241. Use a default queue configuration in JobTracker when + mapred-queues.xml is unavailable. (Todd Lipcon via cdouglas) + + MAPREDUCE-1301. Fix set up of permission checking script used in + localization tests. (Amareshwari Sriramadasu via yhemanth) + + MAPREDUCE-1286. Remove quoting from client opts in TaskRunner. (Yuri + Pradkin via cdouglas) + + MAPREDUCE-1059. Use distcp.bytes.per.map when adding sync markers in + distcp. (Aaron Kimball via cdouglas) + + MAPREDUCE-1009. Update forrest documentation describing hierarchical + queues. (Vinod Kumar Vavilapalli via yhemanth) + + MAPREDUCE-1342. Fixed deadlock in global blacklisting of tasktrackers. + (Amareshwari Sriramadasu via acmurthy) + + MAPREDUCE-1316. Fixes a memory leak of TaskInProgress instances in + the jobtracker. (Amar Kamat via yhemanth) + + MAPREDUCE-1359. TypedBytes TestIO doesn't mkdir its test dir first. + (Anatoli Fomenko via cos) + + MAPREDUCE-1314. Correct errant mapreduce.x.mapreduce.x replacements from + bulk change. (Amareshwari Sriramadasu via cdouglas) + + MAPREDUCE-1365. Restore accidentally renamed test in + TestTaskTrackerBloacklisting. (Amareshwari Sriramadasu via cdouglas) + + MAPREDUCE-1406. Fix spelling of JobContext.MAP_COMBINE_MIN_SPILLS. + (cdouglas) + + MAPREDUCE-1369. JUnit tests should never depend on anything in conf + (Anatoli Fomenko via cos) + + MAPREDUCE-1412. Fix timer granularity issue causing failures in + TestTaskTrackerBlacklisting. (cdouglas) + + MAPREDUCE-1448. Respect --config option in Mumak script. (Hong Tang via + cdouglas) + + MAPREDUCE-1251. c++ utils doesn't compile. (Eli Collins via tomwhite) + + MAPREDUCE-1522. FileInputFormat may use the default FileSystem for the + input path. (Tsz Wo (Nicholas), SZE via cdouglas) + + MAPREDUCE-1407. Update javadoc in mapreduce.{Mapper,Reducer} to match + actual usage. (Benoit Sigoure via cdouglas) + + MAPREDUCE-1258. Fix fair scheduler event log not logging job info. + (matei) + + MAPREDUCE-1089. Fix NPE in fair scheduler preemption when tasks are + scheduled but not running. (Todd Lipcon via matei) + + MAPREDUCE-1014. Fix the libraries for common and hdfs. (omalley) + + MAPREDUCE-1111. JT Jetty UI not working if we run mumak.sh + off packaged distribution directory. (hong tang via mahadev) + + MAPREDUCE-1133. Eclipse .classpath template has outdated jar files and is + missing some new ones. (cos) + + MAPREDUCE-1098. Fixed the distributed-cache to not do i/o while holding a + global lock. (Amareshwari Sriramadasu via acmurthy) + + MAPREDUCE-1158. Fix JT running maps and running reduces metrics. + (sharad) + + MAPREDUCE-1160. Reduce verbosity of log lines in some Map/Reduce classes + to avoid filling up jobtracker logs on a busy cluster. + (Ravi Gummadi and Hong Tang via yhemanth) + + MAPREDUCE-1153. Fix tasktracker metrics when trackers are decommissioned. + (sharad) + + MAPREDUCE-1128. Fix MRUnit to prohibit iterating over values twice. (Aaron + Kimball via cdouglas) + + MAPREDUCE-665. Move libhdfs to HDFS subproject. (Eli Collins via dhruba) + + MAPREDUCE-1196. Fix FileOutputCommitter to use the deprecated cleanupJob + api correctly. (acmurthy) + + MAPREDUCE-1244. Fix eclipse-plugin's build dependencies. (gkesavan) + + MAPREDUCE-1140. Fix DistributedCache to not decrement reference counts for + unreferenced files in error conditions. + (Amareshwari Sriramadasu via yhemanth) + + MAPREDUCE-1245. Fix TestFairScheduler failures by instantiating lightweight + Jobtracker. (sharad) + + MAPREDUCE-1260. Update Eclipse configuration to match changes to Ivy + configuration. (Edwin Chan via cos) + + MAPREDUCE-1152. Distinguish between failed and killed tasks in + JobTrackerInstrumentation. (Sharad Agarwal via cdouglas) + + MAPREDUCE-1285. In DistCp.deleteNonexisting(..), get class from the + parameter instead of using FileStatus.class. (Peter Romianowski via + szetszwo) + + MAPREDUCE-1294. Build fails to pull latest hadoop-core-* artifacts (cos) + + MAPREDUCE-1213. TaskTrackers restart is faster because it deletes + distributed cache directory asynchronously. (Zheng Shao via dhruba) + + MAPREDUCE-1265. The task attempt error log prints the name of the + tasktracker machine. (Scott Chen via dhruba) + + MAPREDUCE-1201. ProcfsBasedProcessTree collects CPU usage information. + (Scott Chen via dhruba) + + MAPREDUCE-1326. fi tests don't use fi-site.xml (cos) + + MAPREDUCE-1165. Replace non-portable function name with C99 equivalent. + (Allen Wittenauer via cdouglas) + + MAPREDUCE-1331. Fixes a typo in a testcase (Devaraj Das) + + MAPREDUCE-1293. AutoInputFormat doesn't work with non-default FileSystems. + (Andrew Hitchcock via tomwhite) + + MAPREDUCE-1131. Using profilers other than hprof can cause JobClient to + report job failure. (Aaron Kimball via tomwhite) + + MAPREDUCE-1155. Streaming tests swallow exceptions. + (Todd Lipcon via tomwhite) + + MAPREDUCE-1212. Mapreduce contrib project ivy dependencies are not included + in binary target. (Aaron Kimball via tomwhite) + + MAPREDUCE-1388. Move the HDFS RAID package from HDFS to MAPREDUCE. + (Eli Collins via dhruba) + + MAPREDUCE-1322. Defines default value for staging directory to be user + based fixing a failing streaming test. + (Devaraj Das and Amareshwari Sriramadasu via yhemanth) + + MAPREDUCE-899. Modified LinuxTaskController to check that task-controller + has right permissions and ownership before performing any actions. + (Amareshwari Sriramadasu via yhemanth) + + MAPREDUCE-1443. DBInputFormat can leak connections. + (Aaron Kimball via tomwhite) + + MAPREDUCE-1457. Fixes JobTracker to get the FileSystem object within + getStagingAreaDir within a privileged block. Fixes Child.java to use the + appropriate UGIs while getting the TaskUmbilicalProtocol proxy and + while executing the task. (Jakob Homan via ddas) + + MAPREDUCE-1399. The archive command shows a null error message (nicholas + via mahadev) + + MAPREDUCE-1305. Improve efficiency of distcp -delete. (Peter Romianowski + via cdouglas) + + MAPREDUCE-1474. Update forrest documentation for Hadoop Archives. (Mahadev + Konar via cdouglas) + + MAPREDUCE-1400. Use tr rather than sed to effect literal substitution in + the build script. (Allen Wittenauer via cdouglas) + + MAPREDUCE-1358. Avoid false positives in OutputLogFilter. (Todd Lipcon via + cdouglas) + + MAPREDUCE-1490. Fix a NullPointerException that could occur during + instantiation and initialization of the DistributedRaidFileSystem. + (Rodrigo Schmidt via dhruba) + + MAPREDUCE-1476. Fix the M/R framework to not call commit for special + tasks like job setup/cleanup and task cleanup. + (Amareshwari Sriramadasu via yhemanth) + + MAPREDUCE-1398. Fix TaskLauncher to stop waiting for slots on a TIP that + is killed / failed. + (Amareshwari Sriramadasu via yhemanth) + + MAPREDUCE-1491. The parity files created by the RAID are combined + using Hadoop Archive Files (HAR). (Rodrigo Schmidt via dhruba) + + MAPREDUCE-1378. URL encode link in jobhistory.jsp to avoid errors caused by + unescaped characters. (E. Sammer via cdouglas) + + MAPREDUCE-1519. RaidNode fails to create new parity file + if an older version already exists. (Rodrigo Schmidt via dhruba) + + MAPREDUCE-1537. Fixes a compilation problem in a testcase after commit + HDFS-984. (Jitendra Nath Pandey via ddas) + + MAPREDUCE-1537. The patch makes the job client call the getDelegationToken + only when security is enabled. (Jitendra Nath Pandey via ddas) + + MAPREDUCE-1510. RAID should regenerate parity files if they get deleted. + (Rodrigo Schmidt via dhruba) + + MAPREDUCE-1421. Fix the LinuxTaskController tests failing on trunk after + the commit of MAPREDUCE-1385. (Amareshwari Sriramadasu via vinodkv) + + MAPREDUCE-1520. Fix TestMiniMRLocalFS failure caused by regression in + getting user working dir. (Amareshwari Sriramadasu via cdouglas) + + MAPREDUCE-1512. RAID uses HarFileSystem directly instead of + FileSystem.get (Rodrigo Schmidt via dhruba) + + MAPREDUCE-1435. Fix symlink handling in task work directory when + cleaning up, essentially to avoid following links. + (Ravi Gummadi via yhemanth) + + MAPREDUCE-1518. RaidNode does not run the deletion check on the + directory that stores the parity files. (Rodrigo Schmidt via dhruba) + + MAPREDUCE-1573. TestStreamingAsDifferentUser fails if run as tt_user. + (Ravi Gummadi via vinodkv) + + MAPREDUCE-927. Cleanup of task-logs should happen in TaskTracker instead + of the Child. (Amareshwari Sriramadasu via vinodkv) + + MAPREDUCE-1578. Decouple HadoopArchives vesrion from HarFileSystem version. + (Rodrigo Schmidt via szetszwo) + + MAPREDUCE-1422. Fix cleanup of localized job directory to work if files + with non-deletable permissions are created within it. + (Amar Kamat via yhemanth) + + MAPREDUCE-1306. Randomize the arrival of heartbeat responses in Mumak. + (Tamas Sarlos via cdouglas) + + MAPREDUCE-1579. archive: check and possibly replace the space charater + in source paths. (szetszwo) + + MAPREDUCE-1536. DataDrivenDBInputFormat does not split date columns correctly. + (Aaron Kimball via enis) + + MAPREDUCE-890. After HADOOP-4491, the user who started mapred system is + not able to run job. (Ravi Gummadi via vinodkv) + + MAPREDUCE-1615. Fix compilation of TestSubmitJob. (cdouglas) + + MAPREDUCE-1508. Protect against NPE in TestMultipleLevelCaching. (Aaron + Kimball via cdouglas) + + MAPREDUCE-1497. Suppress spurious findbugs warning about IndexCache + synchronization. (Amareshwari Sriramadasu via cdouglas) + + MAPREDUCE-1420. Fix TestTTResourceReporting failure. (Scott Chen via + cdouglas) + + MAPREDUCE-1480. Correctly initialize child RecordReaders in + CombineFileInputFormat. (Aaron Kimball via cdouglas) + + MAPREDUCE-1348. Fix block forensics packaging. (Tom White via cdouglas) + + MAPREDUCE-1628. HarFileSystem shows incorrect replication numbers and + permissions. (szetszwo via mahadev) + + MAPREDUCE-1602. Fix the error message for the case that src does not + exist. (szetszwo) + + MAPREDUCE-1585. Create Hadoop Archives version 2 with filenames + URL-encoded (rodrigo via mahadev) + + MAPREDUCE-1523. Sometimes rumen trace generator fails to extract the job + finish time. (dick king via mahadev) + + MAPREDUCE-1635. ResourceEstimator does not work after MAPREDUCE-842. + (Amareshwari Sriramadasu via vinodkv) + + MAPREDUCE-889. binary communication formats added to Streaming by + HADOOP-1722 should be documented. (Klaas Bosteels via tomwhite) + + MAPREDUCE-1031. ant tar target doens't seem to compile tests in contrib + projects. (Aaron Kimball via tomwhite) + + MAPREDUCE-1692. Removed unused testcase TestStreamedMerge. + (Sreekanth Ramakrishnan and Amareshwari Sriramadasu via yhemanth) + + MAPREDUCE-1062. Fix ReliabilityTest to work with retired jobs. (Sreekanth + Ramakrishnan via cdouglas) + + MAPREDUCE-1409. IOExceptions thrown from FIleOutputCommitter::abortTask + should cause the task to fail. (Amareshwari Sriramadasu via cdouglas) + + MAPREDUCE-1695. Include capacity scheduler in findbugs and javadoc-dev + targets and also fix existing warnings. (Hong Tang via yhemanth) + + MAPREDUCE-1494. Ensure TestJobDirCleanup verifies the correct paths. + (Amareshwari Sriramadasu via cdouglas) + + MAPREDUCE-1622. Include missing slf4j dependencies. (cdouglas) + + MAPREDUCE-1515. Accept java5.home from build.properties, not only from the + command line when building forrest docs. (Al Thompson via cdouglas) + + MAPREDUCE-1618. Add missing javadoc to JobStatus::*JobAcls. (Amareshwari + Sriramadasu via cdouglas) + + MAPREDUCE-1219. Remove job level metrics from jobtracker metrics to ease + undue load on jobtracker. (Sreekanth Ramakrishnan via sharad) + + MAPREDUCE-1604. Add Forrest documentation for Job ACLs. + (Amareshwari Sriramadasu via yhemanth) + + MAPREDUCE-1705. Archiving and Purging of HDFS parity files should + handle globbed policies accurately. (Rodrigo Schmidt via dhruba) + + MAPREDUCE-1612. job conf file is not accessible from job history web page. + (Ravi Gummadi and Sreekanth Ramakrishnan via vinodkv) + + MAPREDUCE-1397. NullPointerException observed during task failures. + (Amareshwari Sriramadasu via vinodkv) + + MAPREDUCE-1728. Oracle timezone strings do not match Java. + (Aaron Kimball via tomwhite) + + MAPREDUCE-1609. TaskTracker.localizeJob should not set permissions on + job log directory recursively. (Amareshwari Sriramadasu via vinodkv) + + MAPREDUCE-1657. After task logs directory is deleted, tasklog servlet + displays wrong error message about job ACLs. (Ravi Gummadi via vinodkv) + + MAPREDUCE-1727. TestJobACLs fails after HADOOP-6686. (Ravi Gummadi via vinodkv) + + MAPREDUCE-1611. Refresh nodes and refresh queues doesnt work with service + authorization enabled. (Amar Kamat via vinodkv) + + MAPREDUCE-1276. Correct flaws in the shuffle related to connection setup + and failure attribution. (Amareshwari Sriramadasu via cdouglas) + + MAPREDUCE-1372. ConcurrentModificationException in JobInProgress. + (Dick King and Amareshwari Sriramadasu via tomwhite) + + MAPREDUCE-118. Fix Job.getJobID(). (Amareshwari Sriramadasu via sharad) + + MAPREDUCE-913. TaskRunner crashes with NPE resulting in held up slots, + UNINITIALIZED tasks and hung TaskTracker. (Amareshwari Sriramadasu and + Sreekanth Ramakrishnan via vinodkv) + + MAPREDUCE-1725. Fix MapReduce API incompatibilities between 0.20 and 0.21. + (tomwhite) + + MAPREDUCE-1606. TestJobACLs may timeout as there are no slots for launching + JOB_CLEANUP task. (Ravi Gummadi via vinodkv) + + MAPREDUCE-1765. Correct streaming documentation for StreamXmlRecordReader. + (Corinne Chandel via amareshwari) + + MAPREDUCE-1880. Fix BigDecimal.divide(..) in the pi example. (szetszwo) + + MAPREDUCE-1885. Revert FileSystem create method that takes CreateFlags + (MapReduce part of HADOOP-6826). (Ravi Gummadi via tomwhite) + + MAPREDUCE-1870. Harmonize MapReduce JAR library versions with Common and + HDFS. (tomwhite) + + MAPREDUCE-1791. Remote cluster control functionality needs JavaDocs + improvement (Konstantin Boudnik) + + MAPREDUCE-1942. 'compile-fault-inject' should never be called directly. + (Konstantin Boudnik) + + MAPREDUCE-1876. Fixes TaskAttemptStartedEvent to correctly log event type + for all task types. (Amar Kamat via amareshwari) + + MAPREDUCE-1926. MapReduce distribution is missing build-utils.xml. + (tomwhite) + + MAPREDUCE-2012. Some contrib tests fail in branch 0.21 and trunk. + (Amareshwari Sriramadasu via tomwhite) + + MAPREDUCE-1980. Fixes TaskAttemptUnsuccessfulCompletionEvent and + TaskAttemptFinishedEvent to correctly log event type for all task types. + (Amar Kamat via amareshwari) + + MAPREDUCE-1856. Extract a subset of tests for smoke (DOA) validation (cos) + diff --git a/aarch64/share/doc/hadoop/mapreduce/LICENSE.txt b/aarch64/share/doc/hadoop/mapreduce/LICENSE.txt new file mode 100644 index 0000000..59bcdbc --- /dev/null +++ b/aarch64/share/doc/hadoop/mapreduce/LICENSE.txt @@ -0,0 +1,244 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +APACHE HADOOP SUBCOMPONENTS: + +The Apache Hadoop project contains subcomponents with separate copyright +notices and license terms. Your use of the source code for the these +subcomponents is subject to the terms and conditions of the following +licenses. + +For the org.apache.hadoop.util.bloom.* classes: + +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract + * 034819 (http://www.one-lab.org) + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ diff --git a/aarch64/share/doc/hadoop/mapreduce/NOTICE.txt b/aarch64/share/doc/hadoop/mapreduce/NOTICE.txt new file mode 100644 index 0000000..62fc581 --- /dev/null +++ b/aarch64/share/doc/hadoop/mapreduce/NOTICE.txt @@ -0,0 +1,2 @@ +This product includes software developed by The Apache Software +Foundation (http://www.apache.org/). diff --git a/aarch64/share/doc/hadoop/yarn/CHANGES.txt b/aarch64/share/doc/hadoop/yarn/CHANGES.txt new file mode 100644 index 0000000..b2ff194 --- /dev/null +++ b/aarch64/share/doc/hadoop/yarn/CHANGES.txt @@ -0,0 +1,1698 @@ +Hadoop YARN Change Log + +Release 2.2.0 - 2013-10-13 + + INCOMPATIBLE CHANGES + + YARN-1229. Define constraints on Auxiliary Service names. Change + ShuffleHandler service name from mapreduce.shuffle to + mapreduce_shuffle (Xuan Gong via sseth) + + NEW FEATURES + + IMPROVEMENTS + + YARN-1246. Added application finish-status to ApplicationSummary for the sake + of testing given ApplicationHistoryServer is not yet ready. (Arpit Gupta via + vinodkv) + + YARN-899. Added back queue level administrator-acls so that there is no + regression w.r.t 1.x. (Xuan Gong via vinodkv) + + YARN-1228. Clean up Fair Scheduler configuration loading. (Sandy Ryza) + + YARN-1213. Restore config to ban submitting to undeclared pools in the + Fair Scheduler. (Sandy Ryza) + + YARN-1277. Added a policy based configuration for http/https in common + HttpServer and using the same in YARN - related to per project https config + support via HADOOP-10022. (Suresh Srinivas and Omkar Vinit Joshi via vinodkv) + + OPTIMIZATIONS + + BUG FIXES + + YARN-1128. FifoPolicy.computeShares throws NPE on empty list of Schedulables + (Karthik Kambatla via Sandy Ryza) + + YARN-1214. Register ClientToken MasterKey in SecretManager after it is + saved (Jian He via bikas) + + YARN-49. Improve distributed shell application to work on a secure cluster. + (Vinod Kumar Vavilapalli via hitesh) + + YARN-1157. Fixed ResourceManager UI to behave correctly when apps like + distributed-shell do not set tracking urls. (Xuan Gong via vinodkv) + + YARN-1221. With Fair Scheduler, reserved MB reported in RM web UI increases + indefinitely (Siqi Li via Sandy Ryza) + + YARN-1247. test-container-executor has gotten out of sync with the changes to + container-executor. (rvs via tucu) + + YARN-1070. Fixed race conditions in NodeManager during container-kill. + (Zhijie Shen via vinodkv) + + YARN-1215. Yarn URL should include userinfo. (Chuan Liu via cnauroth) + + YARN-1262. TestApplicationCleanup relies on all containers assigned in a + single heartbeat (Karthik Kambatla via Sandy Ryza) + + YARN-1260. Added webapp.http.address to yarn-default.xml so that default + install with https enabled doesn't have broken link on NM UI. (Omkar Vinit + Joshi via vinodkv) + + YARN-1141. Updating resource requests should be decoupled with updating + blacklist (Zhijie Shen via bikas) + + YARN-876. Node resource is added twice when node comes back from unhealthy + to healthy. (Peng Zhang via Sandy Ryza) + + YARN-890. Ensure CapacityScheduler doesn't round-up metric for available + resources. (Xuan Gong & Hitesh Shah via acmurthy) + + YARN-621. Changed YARN web app to not add paths that can cause duplicate + additions of authenticated filters there by causing kerberos replay errors. + (Omkar Vinit Joshi via vinodkv) + + YARN-1236. FairScheduler setting queue name in RMApp is not working. + (Sandy Ryza) + + YARN-1256. NM silently ignores non-existent service in + StartContainerRequest (Xuan Gong via bikas) + + YARN-1149. NM throws InvalidStateTransitonException: Invalid event: + APPLICATION_LOG_HANDLING_FINISHED at RUNNING (Xuan Gong via hitesh) + + YARN-1271. "Text file busy" errors launching containers again + (Sandy Ryza) + + YARN-1131. $yarn logs command should return an appropriate error message if + YARN application is still running. (Siddharth Seth via hitesh) + + YARN-1219. FSDownload changes file suffix making FileUtil.unTar() throw + exception. (Shanyu Zhao via cnauroth) + + YARN-1251. TestDistributedShell#TestDSShell failed with timeout. (Xuan Gong + via hitesh) + + YARN-1167. Fixed Distributed Shell to not incorrectly show empty hostname + on RM UI. (Xuan Gong via vinodkv) + + YARN-1254. Fixed NodeManager to not pollute container's credentials. (Omkar + Vinit Joshi via vinodkv) + + YARN-1273. Fixed Distributed-shell to account for containers that failed + to start. (Hitesh Shah via vinodkv) + + YARN-1032. Fixed NPE in RackResolver. (Lohit Vijayarenu via acmurthy) + + YARN-1090. Fixed CS UI to better reflect applications as non-schedulable + and not as pending. (Jian He via acmurthy) + + YARN-1274. Fixed NodeManager's LinuxContainerExecutor to create user, app-dir + and log-dirs correctly even when there are no resources to localize for the + container. (Siddharth Seth via vinodkv) + + YARN-1278. Fixed NodeManager to not delete local resources for apps on resync + command from RM - a bug caused by YARN-1149. (Hitesh Shah via vinodkv) + +Release 2.1.1-beta - 2013-09-23 + + INCOMPATIBLE CHANGES + + YARN-707. Added user information also in the YARN ClientToken so that AMs + can implement authorization based on incoming users. (Jason Lowe via vinodkv) + + NEW FEATURES + + IMPROVEMENTS + + YARN-589. Expose a REST API for monitoring the fair scheduler (Sandy Ryza). + + YARN-1074. Cleaned up YARN CLI application list to only display running + applications by default. (Xuan Gong via vinodkv) + + YARN-1093. Corrections to Fair Scheduler documentation (Wing Yew Poon via + Sandy Ryza) + + YARN-942. In Fair Scheduler documentation, inconsistency on which + properties have prefix (Akira Ajisaka via Sandy Ryza) + + YARN-905. Add state filters to nodes CLI (Wei Yan via Sandy Ryza) + + YARN-1083. Changed ResourceManager to fail when the expiry interval is less + than the configured node-heartbeat interval. (Zhijie Shen via vinodkv) + + YARN-1081. Made a trivial change to YARN node CLI header to avoid potential + confusion. (Akira AJISAKA via vinodkv) + + YARN-1034. Remove "experimental" in the Fair Scheduler documentation. + (Karthik Kambatla via Sandy Ryza) + + YARN-1080. Improved help message for "yarn logs" command. (Xuan Gong via + vinodkv) + + YARN-771. AMRMClient support for resource blacklisting (Junping Du via + bikas) + + YARN-1117. Improved help messages for "yarn application" and "yarn node" + commands. (Xuan Gong via vinodkv) + + YARN-758. Augment MockNM to use multiple cores (Karthik Kambatla via + Sandy Ryza) + + YARN-1120. Made ApplicationConstants.Environment.USER definition OS neutral + as the corresponding value is now set correctly end-to-end. (Chuan Liu via + vinodkv) + + YARN-1124. Modified YARN CLI application list to display new and submitted + applications together with running apps by default, following up YARN-1074. + (Xuan Gong via vinodkv) + + YARN-1065. NM should provide AuxillaryService data to the container (Xuan + Gong via bikas) + + YARN-696. Changed RMWebservice apps call to take in multiple application + states. (Trevor Lorimer via vinodkv) + + YARN-910. Augmented auxiliary services to listen for container starts and + completions in addition to application events. (Alejandro Abdelnur via + vinodkv) + + YARN-1137. Add support whitelist for system users to Yarn + container-executor.c. (rvs via tucu) + + YARN-1001. Added a web-service to get statistics about per application-type + per state for consumption by downstream projects. (Zhijie Shen via vinodkv) + + YARN-1203. Changed YARN web-app proxy to handle http and https URLs from + AM registration and finish correctly. (Omkar Vinit Joshi via vinodkv) + + YARN-1204. Added separate configuration properties for https for RM and NM + without which servers enabled with https will also start on http ports. + (Omkar Vinit Joshi via vinodkv) + + OPTIMIZATIONS + + BUG FIXES + + YARN-948. Changed ResourceManager to validate the release container list + before actually releasing them. (Omkar Vinit Joshi via vinodkv) + + YARN-966. Fixed ContainerLaunch to not fail quietly when there are no + localized resources due to some other failure. (Zhijie Shen via vinodkv) + + YARN-502. Fixed a state machine issue with RMNode inside ResourceManager + which was crashing scheduler. (Mayank Bansal via vinodkv) + + YARN-573. Shared data structures in Public Localizer and Private Localizer + are not Thread safe. (Omkar Vinit Joshi via jlowe) + + YARN-903. Changed ContainerManager to suppress unnecessary warnings when + stopping already stopped containers. (Omkar Vinit Joshi via vinodkv) + + YARN-906. Fixed a bug in NodeManager where cancelling ContainerLaunch at + KILLING state causes that the container to hang. (Zhijie Shen via vinodkv) + + YARN-994. HeartBeat thread in AMRMClientAsync does not handle runtime + exception correctly (Xuan Gong via bikas) + + YARN-337. RM handles killed application tracking URL poorly (jlowe) + + YARN-107. Fixed ResourceManager and clients to better handle + forceKillApplication on non-running and finished applications. (Xuan Gong + via vinodkv) + + YARN-643. Fixed ResourceManager to remove all tokens consistently on app + finish. (Xuan Gong via vinodkv) + + YARN-1006. Fixed broken rendering in the Nodes list web page on the RM web + UI. (Xuan Gong via vinodkv) + + YARN-881. Priority#compareTo method seems to be wrong. (Jian He via bikas) + + YARN-1082. Create base directories on HDFS after RM login to ensure RM + recovery doesn't fail in secure mode. (vinodkv via acmurthy) + + YARN-1085. Modified YARN and MR2 web-apps to do HTTP authentication in + secure setup with kerberos. (Omkar Vinit Joshi via vinodkv) + + YARN-1094. Fixed a blocker with RM restart code because of which RM crashes + when try to recover an existing app. (vinodkv) + + YARN-1008. MiniYARNCluster with multiple nodemanagers, all nodes have same + key for allocations. (tucu) + + YARN-602. Fixed NodeManager to not let users override some mandatory + environmental variables. (Kenji Kikushima via vinodkv) + + YARN-1101. Active nodes can be decremented below 0 (Robert Parker + via tgraves) + + YARN-981. Fixed YARN webapp so that /logs servlet works like before. (Jian He + via vinodkv) + + YARN-1077. Fixed TestContainerLaunch test failure on Windows. (Chuan Liu via + vinodkv) + + YARN-957. Fixed a bug in CapacityScheduler because of which requests that + need more than a node's total capability were incorrectly allocated on that + node causing apps to hang. (Omkar Vinit Joshi via vinodkv) + + YARN-1107. Fixed a bug in ResourceManager because of which RM in secure mode + fails to restart. (Omkar Vinit Joshi via vinodkv) + + YARN-1049. ContainerExistStatus should define a status for preempted + containers. (tucu) + + YARN-1144. Unmanaged AMs registering a tracking URI should not be + proxy-fied. (tucu) + + YARN-1152. Fixed a bug in ResourceManager that was causing clients to get + invalid client token key errors when an appliation is about to finish. + (Jason Lowe via vinodkv) + + YARN-292. Fixed FifoScheduler and FairScheduler to make their applications + data structures thread safe to avoid RM crashing with + ArrayIndexOutOfBoundsException. (Zhijie Shen via vinodkv) + + YARN-1025. ResourceManager and NodeManager do not load native libraries on + Windows. (cnauroth) + + YARN-1176. RM web services ClusterMetricsInfo total nodes doesn't include + unhealthy nodes (Jonathan Eagles via tgraves) + + YARN-1078. TestNodeManagerResync, TestNodeManagerShutdown, and + TestNodeStatusUpdater fail on Windows. (Chuan Liu via cnauroth) + + YARN-1194. TestContainerLogsPage fails with native builds (Roman Shaposhnik + via jlowe) + + YARN-1116. Populate AMRMTokens back to AMRMTokenSecretManager after RM + restarts (Jian He via bikas) + + YARN-1189. NMTokenSecretManagerInNM is not being told when applications + have finished (Omkar Vinit Joshi via jlowe) + + YARN-540. Race condition causing RM to potentially relaunch already + unregistered AMs on RM restart (Jian He via bikas) + + YARN-1184. ClassCastException during preemption enforcement. (cdouglas) + +Release 2.1.0-beta - 2013-08-22 + + INCOMPATIBLE CHANGES + + YARN-396. Rationalize AllocateResponse in RM Scheduler API. (Zhijie Shen + via hitesh) + + YARN-439. Flatten NodeHeartbeatResponse. (Xuan Gong via sseth) + + YARN-440. Flatten RegisterNodeManagerResponse. (Xuan Gong via sseth) + + YARN-536. Removed the unused objects ContainerStatus and ContainerStatus from + Container which also don't belong to the container. (Xuan Gong via vinodkv) + + YARN-486. Changed NM's startContainer API to accept Container record given by + RM as a direct parameter instead of as part of the ContainerLaunchContext + record. (Xuan Gong via vinodkv) + + YARN-444. Moved special container exit codes from YarnConfiguration to API + where they belong. (Sandy Ryza via vinodkv) + + YARN-441. Removed unused utility methods for collections from two API + records. (Xuan Gong via vinodkv) + + YARN-561. Modified NodeManager to set key information into the environment + of every container that it launches. (Xuan Gong via vinodkv) + + YARN-579. Stop setting the Application Token in the AppMaster env, in + favour of the copy present in the container token field. + (Vinod Kumar Vavilapalli via sseth) + + YARN-629. Make YarnRemoteException not be rooted at IOException. (Xuan Gong + via vinodkv) + + YARN-633. Changed RMAdminProtocol api to throw IOException and + YarnRemoteException. (Xuan Gong via vinodkv) + + YARN-632. Changed ContainerManager api to throw IOException and + YarnRemoteException. (Xuan Gong via vinodkv) + + YARN-631. Changed ClientRMProtocol api to throw IOException and + YarnRemoteException. (Xuan Gong via vinodkv) + + YARN-630. Changed AMRMProtocol api to throw IOException and + YarnRemoteException. (Xuan Gong via vinodkv) + + YARN-615. Rename ContainerLaunchContext.containerTokens to tokens. + (Vinod Kumar Vavilapalli via sseth) + + YARN-571. Remove user from ContainerLaunchContext. (Omkar Vinit Joshi via + vinodkv) + + YARN-716. Making ApplicationID immutable. (Siddharth Seth via vinodkv) + + YARN-684. ContainerManager.startContainer should use + ContainerTokenIdentifier instead of the entire Container. + (Vinod Kumar Vavilapalli via sseth) + + YARN-735. Make ApplicationAttemptId, ContaienrId and NodeId immutable. + (Jian He via sseth) + + YARN-749. Rename ResourceRequest.(get,set)HostName to + ResourceRequest.(get,set)ResourceName. (acmurthy) + + YARN-720. container-log4j.properties should not refer to mapreduce + property names. (Zhijie Shen via sseth) + + YARN-748. Moved BuilderUtils from yarn-common to yarn-server-common for + eventual retirement. (Jian He via vinodkv) + + YARN-635. Renamed YarnRemoteException to YarnException. (Siddharth Seth via + vinodkv) + + YARN-755. Renamed AllocateResponse.reboot to AllocateResponse.resync. (Bikas + Saha via vinodkv) + + YARN-753. Added individual factory methods for all api protocol records and + converted the records to be abstract classes. (Jian He via vinodkv) + + YARN-724. Moved ProtoBase from api.records to api.records.impl.pb. (Jian He + via vinodkv) + + YARN-759. Create Command enum in AllocateResponse (bikas) + + YARN-777. Removed unreferenced objects from .proto files. (Jian He via + vinodkv) + + YARN-642. Removed health parameter from ResourceManager /nodes web-service + and cleaned the behaviour of the status parameter. (Sandy Ryza vid vinodkv) + + YARN-530. Defined Service model strictly, implemented AbstractService for + robust subclassing and migrated yarn-common services. (Steve Loughran via + vinodkv) + + YARN-746. Renamed Service.register() and Service.unregister() to + registerServiceListener() & unregisterServiceListener() respectively. + (Steve Loughran via vinodkv) + + YARN-792. Moved NodeHealthStatus from yarn.api.record to + yarn.server.api.record. (Jian He via vinodkv) + + YARN-806. Moved ContainerExitStatus from yarn.api to yarn.api.records. (Jian + He via vinodkv) + + YARN-821. Renamed setFinishApplicationStatus to setFinalApplicationStatus in + FinishApplicationMasterRequest for consistency. (Jian He via vinodkv) + + YARN-787. Removed minimum resource from RegisterApplicationMasterResponse. + (tucu via acmurthy) + + YARN-829. Renamed RMTokenSelector to be RMDelegationTokenSelector. (Zhijie + Shen via vinodkv) + + YARN-828. Removed the unsed YarnVersionAnnotation. (Zhijie Shen via vinodkv) + + YARN-823. Moved RMAdmin from yarn.client to yarn.client.cli and renamed it to + be RMAdminCLI. (Jian He via vinodkv) + + YARN-387. Renamed YARN protocols for consistency. + ClientRMProtocol -> ApplicationClientProtocol + AMRMProtocol -> ApplicationMasterProtocol + ContainerManager -> ContainerManagementProtocol + (vinodkv via acmurthy) + + YARN-831. Removed minimum resource from GetNewApplicationResponse as a + follow-up to YARN-787. (Jian He via acmurthy) + + YARN-824. Added static factory methods to hadoop-yarn-client interfaces. + (Jian He via acmurthy) + + YARN-826. Moved Clock and SystemClock into yarn.util package. (Zhijie Shen + via vinodkv) + + YARN-837. Moved yarn.ClusterInfo into MapReduce project as it doesn't belong + to YARN. (Zhijie Shen via vinodkv) + + YARN-822. Renamed ApplicationToken to be AMRMToken, and similarly the + corresponding TokenSelector and SecretManager. (Omkar Vinit Joshi via vinodkv) + + YARN-610. ClientToken is no longer set in the environment of the Containers. + (Omkar Vinit Joshi via vinodkv) + + YARN-834. Fixed annotations for yarn-client module, reorganized packages and + clearly differentiated *Async apis. (Arun C Murthy and Zhijie Shen via + vinodkv) + + YARN-840. Moved ProtoUtils to yarn.api.records.pb.impl. (Jian He via + acmurthy) + + YARN-841. Move Auxiliary service to yarn-api, annotate and document it. + (vinodkv) + + YARN-850. Rename getClusterAvailableResources to getAvailableResources in + AMRMClients (Jian He via bikas) + + YARN-694. Starting to use NMTokens to authenticate all communication with + NodeManagers. (Omkar Vinit Joshi via vinodkv) + + YARN-553. Replaced YarnClient.getNewApplication with + YarnClient.createApplication which provides a directly usable + ApplicationSubmissionContext to simplify the api. (Karthik Kambatla via + acmurthy) + + YARN-851. Share NMTokens using NMTokenCache (api-based) between AMRMClient + and NMClient instead of memory based approach which is used currently. (Omkar + Vinit Joshi via vinodkv) + + YARN-869. Move ResourceManagerAdministrationProtocol out of main YARN api. + (vinodkv via acmurthy) + + YARN-791. Changed RM APIs and web-services related to nodes to ensure that + both are consistent with each other. (Sandy Ryza via vinodkv) + + YARN-727. ClientRMProtocol.getAllApplications should accept ApplicationType as + a parameter. (Xuan Gong via hitesh) + + YARN-701. Use application tokens irrespective of secure or non-secure + mode. (vinodkv via acmurthy) + + YARN-918. Remove ApplicationAttemptId from + RegisterApplicationMasterRequestProto. (vinodkv via acmurthy) + + YARN-926. Modified ContainerManagerProtcol APIs to take in requests for + multiple containers. (Jian He via vinodkv) + + NEW FEATURES + + YARN-482. FS: Extend SchedulingMode to intermediate queues. + (kkambatl via tucu) + + YARN-45. Add protocol for schedulers to request containers back from + ApplicationMasters. (Carlo Curino, cdouglas) + + YARN-563. Add the concept of an application-type for each application. + (Mayank Bansal via vinodkv) + + HADOOP-8562. Enhancements to support Hadoop on Windows Server and Windows + Azure environments. (See breakdown of tasks below for subtasks and + contributors) + + YARN-422. Add a NM Client library to help application-writers. (Zhijie Shen + via vinodkv) + + YARN-392. Make it possible to specify hard locality constraints in resource + requests. (sandyr via tucu) + + YARN-326. Add multi-resource scheduling to the fair scheduler. + (sandyr via tucu) + + YARN-398. Make it possible to specify hard locality constraints in resource + requests for CapacityScheduler. (acmurthy) + + YARN-781. Exposing LOGDIR in all containers' environment which should be used + by containers for logging purposes. (Jian He via vinodkv) + + IMPROVEMENTS + + YARN-347. Node CLI should show CPU info besides memory in node status. + (Junping Du via llu) + + YARN-365. Change NM heartbeat handling to not generate a scheduler event + on each heartbeat. (Xuan Gong via sseth) + + YARN-380. Fix yarn node -status output to be better readable. (Omkar Vinit + Joshi via vinodkv) + + YARN-410. Fixed RM UI so that the new lines diagnostics for a failed app on + the per-application page are translated to html line breaks. (Omkar Vinit + Joshi via vinodkv) + + YARN-198. Added a link to RM pages from the NodeManager web app. (Jian He + via vinodkv) + + YARN-237. Refreshing the RM page forgets how many rows I had in my + Datatables (jian he via bobby) + + YARN-481. Add AM Host and RPC Port to ApplicationCLI Status Output + (Chris Riccomini via bikas) + + YARN-297. Improve hashCode implementations for PB records. (Xuan Gong via + hitesh) + + YARN-417. Create AMRMClient wrapper that provides asynchronous callbacks. + (Sandy Ryza via bikas) + + YARN-497. Yarn unmanaged-am launcher jar does not define a main class in + its manifest (Hitesh Shah via bikas) + + YARN-469. Make scheduling mode in FS pluggable. (kkambatl via tucu) + + YARN-450. Define value for * in the scheduling protocol (Zhijie Shen via + bikas) + + YARN-475. Remove a unused constant in the public API - + ApplicationConstants.AM_APP_ATTEMPT_ID_ENV. (Hitesh Shah via vinodkv) + + YARN-309. Changed NodeManager to obtain heart-beat interval from the + ResourceManager. (Xuan Gong via vinodkv) + + YARN-447. Move ApplicationComparator in CapacityScheduler to use comparator + in ApplicationId. (Nemon Lou via vinodkv) + + YARN-381. Improve fair scheduler docs. (Sandy Ryza via tomwhite) + + YARN-458. YARN daemon addresses must be placed in many different configs. + (sandyr via tucu) + + YARN-193. Scheduler.normalizeRequest does not account for allocation + requests that exceed maximumAllocation limits (Zhijie Shen via bikas) + + YARN-479. NM retry behavior for connection to RM should be similar for + lost heartbeats (Jian He via bikas) + + YARN-495. Changed NM reboot behaviour to be a simple resync - kill all + containers and re-register with RM. (Jian He via vinodkv) + + YARN-514. Delayed store operations should not result in RM unavailability + for app submission (Zhijie Shen via bikas) + + YARN-586. Fixed a typo in ApplicationSubmissionContext#setApplicationId. + (Zhijie Shen via vinodkv) + + YARN-542. Changed the default global AM max-attempts value to be not one. + (Zhijie Shen via vinodkv) + + YARN-583. Moved application level local resources to be localized under the + filecache sub-directory under application directory. (Omkar Vinit Joshi via + vinodkv) + + YARN-581. Added a test to verify that app delegation tokens are restored + after RM restart. (Jian He via vinodkv) + + YARN-577. Add application-progress also to ApplicationReport. (Hitesh Shah + via vinodkv) + + YARN-595. Refactor fair scheduler to use common Resources. (Sandy Ryza + via tomwhite) + + YARN-562. Modified NM to reject any containers allocated by a previous + ResourceManager. (Jian He via vinodkv) + + YARN-591. Moved RM recovery related records out of public API as they do not + belong there. (vinodkv) + + YARN-599. Refactoring submitApplication in ClientRMService and RMAppManager + to separate out various validation checks depending on whether they rely on + RM configuration or not. (Zhijie Shen via vinodkv) + + YARN-618. Modified RM_INVALID_IDENTIFIER to be -1 instead of zero. (Jian He + via vinodkv) + + YARN-625. Move the utility method unwrapAndThrowException from + YarnRemoteExceptionPBImpl to RPCUtil. (Siddharth Seth via vinodkv) + + YARN-645. Moved RMDelegationTokenSecretManager from yarn-server-common to + yarn-server-resourcemanager where it really belongs. (Jian He via vinodkv) + + YARN-651. Changed PBClientImpls of ContainerManager and RMAdmin to throw + IOExceptions also. (Xuan Gong via vinodkv) + + YARN-582. Changed ResourceManager to recover Application token and client + tokens for app attempt so that RM can be restarted while preserving current + applications. (Jian He via vinodkv) + + YARN-568. Add support for work preserving preemption to the FairScheduler. + (Carlo Curino and Sandy Ryza via cdouglas) + + YARN-598. Add virtual cores to queue metrics. (sandyr via tucu) + + YARN-634. Modified YarnRemoteException to be not backed by PB and introduced + a separate SerializedException record. (Siddharth Seth via vinodkv) + + YARN-663. Changed ResourceTracker API and LocalizationProtocol API to throw + YarnRemoteException and IOException. (Xuan Gong via vinodkv) + + YARN-590. Added an optional mesage to be returned by ResourceMaanger when RM + asks an RM to shutdown/resync etc so that NMs can log this message locally + for better debuggability. (Mayank Bansal via vinodkv) + + YARN-617. Made ContainerTokens to be used for validation at NodeManager + also in unsecure mode to prevent AMs from faking resource requirements in + unsecure mode. (Omkar Vinit Joshi via vinodkv) + + YARN-708. Moved RecordFactory classes to hadoop-yarn-api, and put some + miscellaneous fixes to the interfaces. (Siddharth Seth via vinodkv) + + YARN-711. Copied BuilderUtil methods in individual API records as + BuilderUtils is going to be dismantled. (Jian He via vinodkv) + + YARN-714. Added NMTokens to be sent to AMs as part of heart-beat response. + (Omkar Vinit Joshi via vinodkv) + + YARN-638. Modified ResourceManager to restore RMDelegationTokens after + restarting. (Jian He via vinodkv) + + YARN-660. Improve AMRMClient with matching requests (bikas) + + YARN-717. Put object creation factories for Token in the class itself and + remove useless derivations for specific tokens. (Jian He via vinodkv) + + YARN-756. Move Preemption* records to yarn.api where they really belong. + (Jian He via vinodkv) + + YARN-750. Allow for black-listing resources in YARN API and Impl in CS + (acmurthy via bikas) + + YARN-877. Support resource blacklisting for FifoScheduler. + (Junping Du via llu) + + YARN-686. Flatten NodeReport. (sandyr via tucu) + + YARN-737. Throw some specific exceptions directly instead of wrapping them + in YarnException. (Jian He via sseth) + + YARN-731. RPCUtil.unwrapAndThrowException should unwrap remote + RuntimeExceptions. (Zhijie Shen via sseth) + + YARN-600. Hook up cgroups CPU settings to the number of virtual cores + allocated. (sandyr via tucu) + + YARN-648. FS: Add documentation for pluggable policy. (kkambatl via tucu) + + YARN-773. Moved YarnRuntimeException from package api.yarn to + api.yarn.exceptions. (Jian He via vinodkv) + + YARN-692. Creating NMToken master key on RM and sharing it with NM as a part + of RM-NM heartbeat. (Omkar Vinit Joshi via vinodkv) + + YARN-782. vcores-pcores ratio functions differently from vmem-pmem ratio in + misleading way. (sandyr via tucu) + + YARN-803. factor out scheduler config validation from the ResourceManager + to each scheduler implementation. (tucu) + + YARN-789. Enable zero capabilities resource requests in fair scheduler. + (tucu) + + YARN-639. Modified Distributed Shell application to start using the new + NMClient library. (Zhijie Shen via vinodkv) + + YARN-693. Modified RM to send NMTokens on allocate call so that AMs can then + use them for authentication with NMs. (Omkar Vinit Joshi via vinodkv) + + YARN-752. In AMRMClient, automatically add corresponding rack requests for + requested nodes. (sandyr via tucu) + + YARN-825. Fixed javadoc and annotations for yarn-common module. (vinodkv) + + YARN-833. Moved Graph and VisualizeStateMachine into yarn.state package. + (Zhijie Shen via vinodkv) + + YARN-805. Fix javadoc and annotations on classes in the yarn-api + package. (Jian He via sseth) + + YARN-846. Move pb Impl classes from yarn-api to yarn-common. (Jian He via + vinodkv) + + YARN-827. Need to make Resource arithmetic methods accessible (Jian He via + bikas) + + YARN-866. Add test for class ResourceWeights. (ywskycn via tucu) + + YARN-736. Add a multi-resource fair sharing metric. (sandyr via tucu) + + YARN-883. Expose Fair Scheduler-specific queue metrics. (sandyr via tucu) + + YARN-569. Add support for requesting and enforcing preemption requests via + a capacity monitor. (Carlo Curino, cdouglas) + + YARN-521. Augment AM - RM client module to be able to request containers + only at specific locations (Sandy Ryza via bikas) + + YARN-513. Create common proxy client for communicating with RM. (Xuan Gong + & Jian He via bikas) + + YARN-927. Change ContainerRequest to not have more than 1 container count + and remove StoreContainerRequest (bikas) + + YARN-922. Change FileSystemRMStateStore to use directories (Jian He via + bikas) + + YARN-865. RM webservices can't query based on application Types. (Xuan Gong + via hitesh) + + YARN-912. Move client facing exceptions to yarn-api module. (Mayank Bansal + via vinodkv) + + YARN-84. Use Builder to build RPC server. (Brandon Li via szetszwo) + + YARN-1046. Disable mem monitoring by default in MiniYARNCluster. (Karthik + Kambatla via Sandy Ryza) + + YARN-1045. Improve toString implementation for PBImpls. (Jian He via sseth) + + OPTIMIZATIONS + + YARN-512. Log aggregation root directory check is more expensive than it + needs to be. (Maysam Yabandeh via jlowe) + + YARN-719. Move RMIdentifier from Container to ContainerTokenIdentifier. + (Vinod Kumar Vavilapalli via sseth) + + BUG FIXES + + YARN-383. AMRMClientImpl should handle null rmClient in stop() + (Hitesh Shah via sseth) + + YARN-385. Add missing fields - location and #containers to + ResourceRequestPBImpl's toString(). (Sandy Ryza via sseth) + + YARN-377. Use the new StringUtils methods added by HADOOP-9252 and fix + TestContainersMonitor. (Chris Nauroth via szetszwo) + + YARN-391. Formatting fixes for LCEResourceHandler classes. + (Steve Loughran via sseth) + + YARN-390. ApplicationCLI and NodeCLI hard-coded platform-specific line + separator causes test failures on Windows. (Chris Nauroth via suresh) + + YARN-406. Fix TestRackResolver to function in networks where "host1" + resolves to a valid host. (Hitesh Shah via sseth) + + YARN-376. Fixes a bug which would prevent the NM knowing about completed + containers and applications. (Jason Lowe via sseth) + + YARN-196. Nodemanager should be more robust in handling connection failure + to ResourceManager when a cluster is started (Xuan Gong via hitesh) + + YARN-485. TestProcfsProcessTree#testProcessTree() doesn't wait long enough + for the process to die. (kkambatl via tucu) + + YARN-71. Fix the NodeManager to clean up local-dirs on restart. + (Xuan Gong via sseth) + + YARN-378. Fix RM to make the AM max attempts/retries to be configurable + per application by clients. (Zhijie Shen via vinodkv) + + YARN-498. Unmanaged AM launcher does not set various constants in env for + an AM, also does not handle failed AMs properly. (Hitesh Shah via bikas) + + YARN-496. Fair scheduler configs are refreshed inconsistently in + reinitialize. (Sandy Ryza via tomwhite) + + YARN-474. Fix CapacityScheduler to trigger application-activation when + am-resource-percent configuration is refreshed. (Zhijie Shen via vinodkv) + + YARN-209. Fix CapacityScheduler to trigger application-activation when + the cluster capacity changes. (Zhijie Shen via vinodkv) + + YARN-24. Nodemanager fails to start if log aggregation enabled and + namenode unavailable. (sandyr via tucu) + + YARN-515. Node Manager not getting the master key. (Robert Joseph Evans + via jlowe) + + YARN-382. SchedulerUtils improve way normalizeRequest sets the resource + capabilities. (Zhijie Shen via bikas) + + YARN-467. Modify public distributed cache to localize files such that no + local directory hits unix file count limits and thus prevent job failures. + (Omkar Vinit Joshi via vinodkv) + + YARN-101. Fix NodeManager heartbeat processing to not lose track of completed + containers in case of dropped heartbeats. (Xuan Gong via vinodkv) + + YARN-538. RM address DNS lookup can cause unnecessary slowness on every JHS + page load. (sandyr via tucu) + + YARN-532. Change RMAdmin and Localization client protocol PB implementations + to implement closeable so that they can be stopped when needed via + RPC.stopProxy(). (Siddharth Seth via vinodkv) + + YARN-99. Modify private distributed cache to localize files such that no + local directory hits unix file count limits and thus prevent job failures. + (Omkar Vinit Joshi via vinodkv) + + YARN-112. Fixed a race condition during localization that fails containers. + (Omkar Vinit Joshi via vinodkv) + + YARN-534. Change RM restart recovery to also account for AM max-attempts + configuration after the restart. (Jian He via vinodkv) + + YARN-539. Addressed memory leak of LocalResource objects NM when a resource + localization fails. (Omkar Vinit Joshi via vinodkv) + + YARN-319. Submitting a job to a fair scheduler queue for which the user + does not have permission causes the client to wait forever. + (shenhong via tomwhite) + + YARN-412. Fixed FifoScheduler to check hostname of a NodeManager rather + than its host:port during scheduling which caused incorrect locality for + containers. (Roger Hoover via acmurthy) + + YARN-500. Fixed YARN webapps to not roll-over ports when explicitly asked + to use non-ephemeral ports. (Kenji Kikushima via vinodkv) + + YARN-518. Fair Scheduler's document link could be added to the hadoop 2.x + main doc page. (sandyr via tucu) + + YARN-476. ProcfsBasedProcessTree info message confuses users. + (sandyr via tucu) + + YARN-585. Fix failure in TestFairScheduler#testNotAllowSubmitApplication + caused by YARN-514. (Zhijie Shen via vinodkv) + + YARN-547. Fixed race conditions in public and private resource localization + which used to cause duplicate downloads. (Omkar Vinit Joshi via vinodkv) + + YARN-594. Update test and add comments in YARN-534 (Jian He via bikas) + + YARN-549. YarnClient.submitApplication should wait for application to be + accepted by the RM (Zhijie Shen via bikas) + + YARN-605. Fix failing unit test in TestNMWebServices when versionInfo has + parantheses like when running on a git checkout. (Hitesh Shah via vinodkv) + + YARN-289. Fair scheduler allows reservations that won't fit on node. + (Sandy Ryza via tomwhite) + + YARN-576. Modified ResourceManager to reject NodeManagers that don't satisy + minimum resource requirements. (Kenji Kikushima via vinodkv) + + YARN-646. Fix two typos in Fair Scheduler user guide. (Dapeng Sun via atm) + + YARN-507. Add interface visibility and stability annotations to FS + interfaces/classes. (kkambatl via tucu) + + YARN-637. FS: maxAssign is not honored. (kkambatl via tucu) + + YARN-655. Fair scheduler metrics should subtract allocated memory from + available memory. (sandyr via tucu) + + YARN-628. Fix the way YarnRemoteException is being unrolled to extract out + the underlying exception. (Siddharth Seth via vinodkv) + + YARN-695. Remove masterContainer and status unused fields from + ApplicationReportProto and fix bugs in ApplicationReportPBImpl. (Zhijie Shen + via vinodkv) + + YARN-706. Fixed race conditions in TestFSDownload. (Zhijie Shen via vinodkv). + + YARN-715. Fixed unit test failures - TestDistributedShell and + TestUnmanagedAMLauncher. (Vinod Kumar Vavilapalli via sseth) + + YARN-578. Fixed NM to use SecureIOUtils for reading and aggregating logs. + (Omkar Vinit Joshi via vinodkv) + + YARN-733. Fixed TestNMClient from failing occasionally. (Zhijie Shen via + vinodkv) + + YARN-730. Fix NMClientAsync to remove completed containers. (Zhijie Shen + via acmurthy) + + YARN-726. Fix queue & finish time fields in web-ui for ResourceManager. + (Mayank Bansal via acmurthy) + + YARN-757. Changed TestRMRestart to use the default scheduler to avoid test + failures. (Bikas Saha via vinodkv) + + YARN-742. Log aggregation causes a lot of redundant setPermission calls. + (jlowe via kihwal) + + YARN-764. blank Used Resources on Capacity Scheduler page (Nemon Lou via + tgraves) + + YARN-761. TestNMClientAsync fails sometimes (Zhijie Shen via bikas) + + YARN-760. NodeManager throws AvroRuntimeException on failed start. + (Niranjan Singh via jlowe) + + YARN-767. Initialize application metrics at RM bootup. (Jian He via + acmurthy) + + YARN-700. TestInfoBlock fails on Windows because of line ending missmatch. + (Ivan Mitic via cnauroth) + + YARN-117. Migrated rest of YARN to the new service model. (Steve Louhran via + vinodkv) + + YARN-812. Set default logger for application summary logger to + hadoop.root.logger. (sseth via acmurthy) + + YARN-848. Nodemanager does not register with RM using the fully qualified + hostname. (Hitesh Shah via sseth) + + YARN-854. Fixing YARN bugs that are failing applications in secure + environment. (Omkar Vinit Joshi via vinodkv) + + YARN-861. TestContainerManager is failing. (Vinod Kumar Vavilapalli via + hitesh) + + YARN-874. Making common RPC to switch to not switch to simple when other + mechanisms are enabled and thus fix YARN/MR test failures after HADOOP-9421. + (Daryn Sharp and Vinod Kumar Vavilapalli via vinodkv) + + YARN-845. RM crash with NPE on NODE_UPDATE (Mayank Bansal via bikas) + + YARN-369. Handle ( or throw a proper error when receiving) status updates + from application masters that have not registered (Mayank Bansal & + Abhishek Kapoor via bikas) + + YARN-541. getAllocatedContainers() is not returning all the allocated + containers (bikas) + + YARN-763. AMRMClientAsync should stop heartbeating after receiving + shutdown from RM (Xuan Gong via bikas) + + YARN-654. AMRMClient: Perform sanity checks for parameters of public + methods (Xuan Gong via bikas)" + + YARN-919. Document setting default heap sizes in yarn-env.sh (Mayank + Bansal via hitesh) + + YARN-795. Fair scheduler queue metrics should subtract allocated vCores from + available vCores. (ywskycn via tucu) + + YARN-799. Fix CgroupsLCEResourcesHandler to use /tasks instead of + /cgroup.procs. (Chris Riccomini via acmurthy) + + YARN-333. Schedulers cannot control the queue-name of an + application. (sandyr via tucu) + + YARN-368. Fixed a typo in error message in Auxiliary services. (Albert Chu + via vinodkv) + + YARN-295. Fixed a race condition in ResourceManager RMAppAttempt state + machine. (Mayank Bansal via vinodkv) + + YARN-523. Modified a test-case to validate container diagnostics on + localization failures. (Jian He via vinodkv) + + YARN-661. Fixed NM to cleanup users' local directories correctly when + starting up. (Omkar Vinit Joshi via vinodkv) + + YARN-820. Fixed an invalid state transition in NodeManager caused by failing + resource localization. (Mayank Bansal via vinodkv) + + YARN-62. Modified NodeManagers to avoid AMs from abusing container tokens for + repetitive container launches. (Omkar Vinit Joshi via vinodkv) + + YARN-814. Improving diagnostics when containers fail during launch due to + various reasons like invalid env etc. (Jian He via vinodkv) + + YARN-897. Ensure child queues are ordered correctly to account for + completed containers. (Djellel Eddine Difallah via acmurthy) + + YARN-853. Fixed CapacityScheduler's maximum-am-resource-percent to properly + work beyond refreshing queues. (Devaraj K via vinodkv) + + YARN-873. YARNClient.getApplicationReport(unknownAppId) returns a null + report (Xuan Gong via bikas) + + YARN-875. Application can hang if AMRMClientAsync callback thread has + exception (Xuan Gong via bikas) + + YARN-461. Fair scheduler should not accept apps with empty string queue name. + (ywskycn via tucu) + + YARN-968. RM admin commands don't work. (vinodkv via kihwal) + + YARN-688. Fixed NodeManager to properly cleanup containers when it is shut + down. (Jian He via vinodkv) + + YARN-960. Fixed ResourceManager to propagate client-submitted credentials + irrespective of security. (Daryn Sharp via vinodkv) + + YARN-937. Fix unmanaged AM in non-secure/secure setup post YARN-701. (tucu) + + YARN-932. TestResourceLocalizationService.testLocalizationInit can fail on + JDK7. (Karthik Kambatla via Sandy Ryza) + + YARN-961. Changed ContainerManager to enforce Token auth irrespective of + security. (Omkar Vinit Joshi via vinodkv) + + YARN-945. Removed setting of AMRMToken's service from ResourceManager + and changed client libraries do it all the time and correctly. (vinodkv) + + YARN-656. In scheduler UI, including reserved memory in Memory Total can + make it exceed cluster capacity. (Sandy Ryza) + + BREAKDOWN OF HADOOP-8562/YARN-191 SUBTASKS AND RELATED JIRAS + + YARN-158. Yarn creating package-info.java must not depend on sh. + (Chris Nauroth via suresh) + + YARN-176. Some YARN tests fail to find winutils. (Chris Nauroth via suresh) + + YARN-207. YARN distribution build fails on Windows. (Chris Nauroth via + suresh) + + YARN-199. Yarn cmd line scripts for windows. (Ivan Mitic via suresh) + + YARN-213. YARN build script would be more readable using abspath. + (Chris Nauroth via suresh) + + YARN-233. Added support for running containers in MS Windows to YARN. (Chris + Nauroth via acmurthy) + + YARN-234. Added support for process tree and resource calculator in MS Windows + to YARN. (Chris Nauroth via acmurthy) + + YARN-259. Fix LocalDirsHandlerService to use Path rather than URIs. (Xuan + Gong via acmurthy) + + YARN-316. YARN container launch may exceed maximum Windows command line + length due to long classpath. (Chris Nauroth via suresh) + + YARN-359. Fixing commands for container signalling in Windows. (Chris Nauroth + via vinodkv) + + YARN-506. Move to common utils FileUtil#setReadable/Writable/Executable and + FileUtil#canRead/Write/Execute. (Ivan Mitic via suresh) + + YARN-488. TestContainerManagerSecurity fails on Windows. (Chris Nauroth + via hitesh) + + YARN-490. TestDistributedShell fails on Windows. (Chris Nauroth via hitesh) + + YARN-491. TestContainerLogsPage fails on Windows. (Chris Nauroth via hitesh) + + YARN-487. Modify path manipulation in LocalDirsHandlerService to let + TestDiskFailures pass on Windows. (Chris Nauroth via vinodkv) + + YARN-593. container launch on Windows does not correctly populate + classpath with new process's environment variables and localized resources + (Chris Nauroth via bikas) + + YARN-493. Fixed some shell related flaws in YARN on Windows. (Chris Nauroth + via vinodkv) + + YARN-839. TestContainerLaunch.testContainerEnvVariables fails on Windows. + (Chuan Liu via cnauroth) + + YARN-597. TestFSDownload fails on Windows due to dependencies on + tar/gzip/jar tools. (Ivan Mitic via acmurthy) + + YARN-852. TestAggregatedLogFormat.testContainerLogsFileAccess fails on + Windows. (Chuan Liu via cnauroth) + + YARN-894. NodeHealthScriptRunner timeout checking is inaccurate on Windows. + (Chuan Liu via cnauroth) + + YARN-909. Disable TestLinuxContainerExecutorWithMocks on Windows. (Chuan Liu + via cnauroth) + + YARN-1043. Push all metrics consistently. (Jian He via acmurthy) + + YARN-1056. Remove dual use of string 'resourcemanager' in + yarn.resourcemanager.connect.{max.wait.secs|retry_interval.secs} + (Karthik Kambatla via acmurthy) + +Release 2.0.5-alpha - 06/06/2013 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + +Release 2.0.4-alpha - 2013-04-25 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + + YARN-429. capacity-scheduler config missing from yarn-test artifact. + (sseth via hitesh) + + YARN-470. Support a way to disable resource monitoring on the NodeManager. + (Siddharth Seth via hitesh) + +Release 2.0.3-alpha - 2013-02-06 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + YARN-145. Add a Web UI to the fair share scheduler. (Sandy Ryza via tomwhite) + + YARN-3. Add support for CPU isolation/monitoring of containers. + (adferguson via tucu) + + YARN-230. RM Restart phase 1 - includes support for saving/restarting all + applications on an RM bounce. (Bikas Saha via acmurthy) + + YARN-103. Add a yarn AM-RM client module. (Bikas Saha via sseth) + + YARN-286. Add a YARN ApplicationClassLoader. (tomwhite) + + YARN-2. Enhanced CapacityScheduler to account for CPU alongwith memory for + multi-dimensional resource scheduling. (acmurthy) + + YARN-328. Use token request messages defined in hadoop common. (suresh) + + YARN-231. RM Restart - Add FS-based persistent store implementation for + RMStateStore (Bikas Saha via hitesh) + + IMPROVEMENTS + + YARN-223. Update process tree instead of getting new process trees. + (Radim Kolar via llu) + + YARN-57. Allow process-tree based resource calculation et al. to be + pluggable to support it on multiple platforms. (Radim Kolar via acmurthy) + + YARN-78. Changed UnManagedAM application to use YarnClient. (Bikas Saha via + vinodkv) + + YARN-93. Fixed RM to propagate diagnostics from applications that have + finished but failed (Jason Lowe via vinodkv). + + YARN-28. Fixed TestCompositeService to not depend on test-order and thus + made it pass on JDK7 (Thomas Graves via vinodkv). + + YARN-82. Change the default local and log dirs to be based on + hadoop.tmp.dir and yarn.log.dir. (Hemanth Yamijala via sseth) + + YARN-53. Added the missing getGroups API to ResourceManager. (Bo Wang via + vinodkv) + + YARN-116. Add the ability to change the RM include/exclude file without + a restart. (xieguiming and Harsh J via sseth) + + YARN-23. FairScheduler: FSQueueSchedulable#updateDemand() - potential + redundant aggregation. (kkambatl via tucu) + + YARN-127. Move RMAdmin tool to its correct location - the client module. + (vinodkv) + + YARN-40. Provided support for missing YARN commands (Devaraj K and Vinod + Kumar Vavilapalli via vinodkv) + + YARN-33. Change LocalDirsHandlerService to validate the configured local and + log dirs. (Mayank Bansal via sseth) + + YARN-94. Modify DistributedShell to point to main-class by default, clean up + the help message, and hard-code the AM class. (Hitesh Shah via vinodkv) + + YARN-146. Add unit tests for computing fair share in the fair scheduler. + (Sandy Ryza via tomwhite) + + HADOOP-8911. CRLF characters in source and text files. + (Raja Aluri via suresh) + + YARN-136. Make ClientToAMTokenSecretManager part of RMContext (Vinod Kumar + Vavilapalli via sseth) + + YARN-183. Clean up fair scheduler code. (Sandy Ryza via tomwhite) + + YARN-129. Simplify classpath construction for mini YARN tests. (tomwhite) + + YARN-254. Update fair scheduler web UI for hierarchical queues. + (sandyr via tucu) + + YARN-315. Using the common security token protobuf definition from hadoop + common. (Suresh Srinivas via vinodkv) + + YARN-170. Change NodeManager stop to be reentrant. (Sandy Ryza via vinodkv) + + YARN-331. Fill in missing fair scheduler documentation. (sandyr via tucu) + + YARN-277. Use AMRMClient in DistributedShell to exemplify the approach. + (Bikas Saha via hitesh) + + YARN-360. Allow apps to concurrently register tokens for renewal. + (Daryn Sharp via sseth) + + OPTIMIZATIONS + + BUG FIXES + + YARN-131. Fix incorrect ACL properties in capacity scheduler documentation. + (Ahmed Radwan via sseth) + + YARN-102. Move the apache header to the top of the file in MemStore.java. + (Devaraj K via sseth) + + YARN-134. ClientToAMSecretManager creates keys without checking for + validity of the appID. (Vinod Kumar Vavilapalli via sseth) + + YARN-30. Fixed tests verifying web-services to work on JDK7. (Thomas Graves + via vinodkv) + + YARN-150. Fixes AppRejectedTransition does not unregister a rejected + app-attempt from the ApplicationMasterService (Bikas Saha via sseth) + + YARN-140. Add capacity-scheduler-default.xml to provide a default set of + configurations for the capacity scheduler. (ahmed via tucu) + + YARN-179. Fix some unit test failures. (Vinod Kumar Vavilapalli via sseth) + + YARN-181. Fixed eclipse settings broken by capacity-scheduler.xml move via + YARN-140. (Siddharth Seth via vinodkv) + + YARN-169. Update log4j.appender.EventCounter to use + org.apache.hadoop.log.metrics.EventCounter (Anthony Rojas via tomwhite) + + YARN-184. Remove unnecessary locking in fair scheduler, and address + findbugs excludes. (sandyr via tucu) + + YARN-224. Fair scheduler logs too many nodeUpdate INFO messages. + (Sandy Ryza via tomwhite) + + YARN-222. Fair scheduler should create queue for each user by default. + (Sandy Ryza via tomwhite) + + MAPREDUCE-4778. Fair scheduler event log is only written if directory + exists on HDFS. (Sandy Ryza via tomwhite) + + YARN-229. Remove old unused RM recovery code. (Bikas Saha via acmurthy) + + YARN-187. Add hierarchical queues to the fair scheduler. + (Sandy Ryza via tomwhite) + + YARN-72. NM should handle cleaning up containers when it shuts down. + (Sandy Ryza via tomwhite) + + YARN-267. Fix fair scheduler web UI. (Sandy Ryza via tomwhite) + + YARN-264. y.s.rm.DelegationTokenRenewer attempts to renew token even + after removing an app. (kkambatl via tucu) + + YARN-271. Fair scheduler hits IllegalStateException trying to reserve + different apps on same node. (Sandy Ryza via tomwhite) + + YARN-272. Fair scheduler log messages try to print objects without + overridden toString methods. (sandyr via tucu) + + YARN-278. Fair scheduler maxRunningApps config causes no apps to make + progress. (sandyr via tucu) + + YARN-282. Fair scheduler web UI double counts Apps Submitted. + (sandyr via tucu) + + YARN-283. Fair scheduler fails to get queue info without root prefix. + (sandyr via tucu) + + YARN-192. Node update causes NPE in the fair scheduler. + (Sandy Ryza via tomwhite) + + YARN-288. Fair scheduler queue doesn't accept any jobs when ACLs are + configured. (Sandy Ryza via tomwhite) + + YARN-300. After YARN-271, fair scheduler can infinite loop and not + schedule any application. (Sandy Ryza via tomwhite) + + YARN-301. Fair scheduler throws ConcurrentModificationException when + iterating over app's priorities. (Sandy Ryza via tomwhite) + + YARN-217. Fix RMAdmin protocol description to make it work in secure mode + also. (Devaraj K via vinodkv) + + YARN-253. Fixed container-launch to not fail when there are no local + resources to localize. (Tom White via vinodkv) + + YARN-330. Fix flakey test: TestNodeManagerShutdown#testKillContainersOnShutdown. + (Sandy Ryza via hitesh) + + YARN-335. Fair scheduler doesn't check whether rack needs containers + before assigning to node. (Sandy Ryza via tomwhite) + + YARN-336. Fair scheduler FIFO scheduling within a queue only allows 1 + app at a time. (Sandy Ryza via tomwhite) + + YARN-135. Client tokens should be per app-attempt, and should be + unregistered on App-finish. (vinodkv via sseth) + + YARN-302. Fair scheduler assignmultiple should default to false. (sandyr via tucu) + + YARN-372. Move InlineDispatcher from hadoop-yarn-server-resourcemanager to + hadoop-yarn-common (sseth via hitesh) + + YARN-370. Fix SchedulerUtils to correctly round up the resource for + containers. (Zhijie Shen via acmurthy) + + YARN-355. Fixes a bug where RM app submission could jam under load. + (Daryn Sharp via sseth) + +Release 2.0.2-alpha - 2012-09-07 + + YARN-9. Rename YARN_HOME to HADOOP_YARN_HOME. (vinodkv via acmurthy) + + NEW FEATURES + + YARN-1. Promote YARN to be a sub-project of Apache Hadoop. (acmurthy) + + IMPROVEMENTS + + YARN-29. Add a yarn-client module. (Vinod Kumar Vavilapalli via sseth) + + YARN-10. Fix DistributedShell module to not have a dependency on + hadoop-mapreduce-client-core. (Hitesh Shah via vinodkv) + + YARN-80. Add support for delaying rack-local containers in + CapacityScheduler. (acmurthy) + + YARN-137. Change the default YARN scheduler to be the CapacityScheduler. + (sseth via acmurthy) + + OPTIMIZATIONS + + BUG FIXES + + YARN-13. Fix pom versions for YARN in branch-2 (todd) + + MAPREDUCE-2374. "Text File Busy" errors launching MR tasks. (Andy Isaacson + via atm) + + YARN-12. Fix findbugs warnings in FairScheduler. (Junping Du via acmurthy) + + YARN-22. Fix ContainerLogs to work if the log-dir is specified as a URI. + (Mayank Bansal via sseth) + + YARN-37. Change TestRMAppTransitions to use the DrainDispatcher. + (Mayank Bansal via sseth) + + YARN-79. Implement close on all clients to YARN so that RPC clients don't + throw exceptions on shut-down. (Vinod Kumar Vavilapalli) + + YARN-42. Modify NM's non-aggregating logs' handler to stop properly so that + NMs don't get NPEs on startup errors. (Devaraj K via vinodkv) + + YARN-15. Updated default classpath for YARN applications to reflect split of + YARN into a sub-project. (Arun C Murthy via vinodkv) + + YARN-75. Modified ResourceManager's RMContainer to handle a valid RELEASE + event at RUNNING state. (Siddharth Seth via vinodkv) + + YARN-138. Ensure default values for minimum/maximum container sizes is + sane. (harsh & sseth via acmurthy) + +Release 0.23.9 - UNRELEASED + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + +Release 0.23.8 - 2013-06-05 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + YARN-548. Add tests for YarnUncaughtExceptionHandler (Vadim Bondarev via + jeagles) + + BUG FIXES + + YARN-363. Add webapps/proxy directory without which YARN proxy-server fails + when started in stand-alone mode. (Kenji Kikushima via vinodkv) + + YARN-690. RM exits on token cancel/renew problems (daryn via bobby) + +Release 0.23.7 - 2013-04-18 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + YARN-133 Update web services docs for RM clusterMetrics (Ravi Prakash via + kihwal) + + YARN-249. Capacity Scheduler web page should show list of active users per + queue like it used to (in 1.x) (Ravi Prakash via tgraves) + + YARN-236. RM should point tracking URL to RM web page when app fails to + start (Jason Lowe via jeagles) + + YARN-269. Resource Manager not logging the health_check_script result when + taking it out (Jason Lowe via kihwal) + + YARN-227. Application expiration difficult to debug for end-users + (Jason Lowe via jeagles) + + YARN-443. allow OS scheduling priority of NM to be different than the + containers it launches (tgraves) + + YARN-468. coverage fix for org.apache.hadoop.yarn.server.webproxy.amfilter + (Aleksey Gorshkov via bobby) + + YARN-200. yarn log does not output all needed information, and is in a + binary format (Ravi Prakash via jlowe) + + YARN-525. make CS node-locality-delay refreshable (Thomas Graves via jlowe) + + OPTIMIZATIONS + + YARN-357. App submission should not be synchronized (daryn) + + BUG FIXES + + YARN-343. Capacity Scheduler maximum-capacity value -1 is invalid (Xuan + Gong via tgraves) + + YARN-364. AggregatedLogDeletionService can take too long to delete logs + (jlowe) + + YARN-362. Unexpected extra results when using webUI table search (Ravi + Prakash via jlowe) + + YARN-400. RM can return null application resource usage report leading to + NPE in client (Jason Lowe via tgraves) + + YARN-426. Failure to download a public resource prevents further downloads + (Jason Lowe via bobby) + + YARN-448. Remove unnecessary hflush from log aggregation (Kihwal Lee via + bobby) + + YARN-345. Many InvalidStateTransitonException errors for ApplicationImpl + in Node Manager (Robert Parker via jlowe) + + YARN-109. .tmp file is not deleted for localized archives (Mayank Bansal + via bobby) + + YARN-460. CS user left in list of active users for the queue even when + application finished (tgraves) + +Release 0.23.6 - 2013-02-06 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + YARN-285. Added a temporary plugin interface for RM to be able to redirect + to JobHistory server for apps that it no longer tracks. (Derek Dagit via + vinodkv) + + OPTIMIZATIONS + + BUG FIXES + + YARN-188. Coverage fixing for CapacityScheduler (Aleksey Gorshkov via + bobby) + + YARN-214. RMContainerImpl does not handle event EXPIRE at state RUNNING + (jeagles via bobby) + + YARN-151. Browser thinks RM main page JS is taking too long + (Ravi Prakash via bobby) + + YARN-204. test coverage for org.apache.hadoop.tools (Aleksey Gorshkov via + bobby) + + YARN-251. Proxy URI generation fails for blank tracking URIs (Tom White + via jlowe) + + YARN-258. RM web page UI shows Invalid Date for start and finish times + (Ravi Prakash via jlowe) + + YARN-266. RM and JHS Web UIs are blank because AppsBlock is not escaping + string properly (Ravi Prakash via jlowe) + + YARN-280. RM does not reject app submission with invalid tokens + (Daryn Sharp via tgraves) + + YARN-225. Proxy Link in RM UI thows NPE in Secure mode + (Devaraj K via bobby) + + YARN-293. Node Manager leaks LocalizerRunner object for every Container + (Robert Joseph Evans via jlowe) + + YARN-50. Implement renewal / cancellation of Delegation Tokens + (Siddharth Seth via tgraves) + + YARN-320. RM should always be able to renew its own tokens. + (Daryn Sharp via sseth) + + YARN-325. RM CapacityScheduler can deadlock when getQueueInfo() is + called and a container is completing (Arun C Murthy via tgraves) + + YARN-334. Maven RAT plugin is not checking all source files (tgraves) + + YARN-354. WebAppProxyServer exits immediately after startup (Liang Xie via + jlowe) + +Release 0.23.5 - 2012-11-28 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + YARN-161. Fix multiple compiler warnings for unchecked operations in YARN + common. (Chris Nauroth via vinodkv) + + YARN-43. Fix TestResourceTrackerService to not depend on test order and thus + pass on JDK7. (Thomas Graves via vinodkv) + + YARN-32. Fix TestApplicationTokens to not depend on test order and thus pass + on JDK7. (vinodkv) + + YARN-186. Coverage fixing LinuxContainerExecutor (Aleksey Gorshkov via + bobby) + + YARN-216. Remove jquery theming support. (Robert Joseph Evans via jlowe) + + OPTIMIZATIONS + + BUG FIXES + + YARN-163. Retrieving container log via NM webapp can hang with multibyte + characters in log (jlowe via bobby) + + YARN-174. Modify NodeManager to pass the user's configuration even when + rebooting. (vinodkv) + + YARN-177. CapacityScheduler - adding a queue while the RM is running has + wacky results (acmurthy vai tgraves) + + YARN-178. Fix custom ProcessTree instance creation (Radim Kolar via bobby) + + YARN-180. Capacity scheduler - containers that get reserved create + container token to early (acmurthy and bobby) + + YARN-139. Interrupted Exception within AsyncDispatcher leads to user + confusion. (Vinod Kumar Vavilapalli via jlowe) + + YARN-165. RM should point tracking URL to RM web page for app when AM fails + (jlowe via bobby) + + YARN-159. RM web ui applications page should be sorted to display last app + first (tgraves via bobby) + + YARN-166. capacity scheduler doesn't allow capacity < 1.0 (tgraves via + bobby) + + YARN-189. Fixed a deadlock between RM's ApplicationMasterService and the + dispatcher. (Thomas Graves via vinodkv) + + YARN-202. Log Aggregation generates a storm of fsync() for namenode + (Kihwal Lee via bobby) + + YARN-201. Fix CapacityScheduler to be less conservative for starved + off-switch requests. (jlowe via acmurthy) + + YARN-206. TestApplicationCleanup.testContainerCleanup occasionally fails. + (jlowe via jeagles) + + YARN-212. NM state machine ignores an APPLICATION_CONTAINER_FINISHED event + when it shouldn't (Nathan Roberts via jlowe) + + YARN-219. NM should aggregate logs when application finishes. (bobby) + +Release 0.23.4 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + Change package of YarnClient to org.apache.hadoop. (Bikas Saha via vinodkv) + + YARN-108. FSDownload can create cache directories with the wrong + permissions (Jason Lowe via bobby) + + YARN-57. Allow process-tree based resource calculation et al. to be + pluggable to support it on multiple platforms. (Radim Kolar via acmurthy) + + OPTIMIZATIONS + + BUG FIXES + + YARN-88. DefaultContainerExecutor can fail to set proper permissions. + (Jason Lowe via sseth) + + YARN-106. Nodemanager needs to set permissions of local directories (jlowe + via bobby) + +Release 0.23.3 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMAZATIONS + + BUG FIXES + + YARN-14. Symlinks to peer distributed cache files no longer work + (Jason Lowe via bobby) + + YARN-25. remove old aggregated logs (Robert Evans via tgraves) + + YARN-27. Failed refreshQueues due to misconfiguration prevents further + refreshing of queues (Arun Murthy via tgraves) + + MAPREDUCE-4323. NM leaks filesystems (Jason Lowe via jeagles) + + YARN-39. RM-NM secret-keys should be randomly generated and rolled every + so often. (vinodkv and sseth via sseth) + + YARN-31. Fix TestDelegationTokenRenewer to not depend on test order so as to + pass tests on jdk7. (Thomas Graves via vinodkv) + + YARN-63. RMNodeImpl is missing valid transitions from the UNHEALTHY state + (Jason Lowe via bobby) + + YARN-60. Fixed a bug in ResourceManager which causes all NMs to get NPEs and + thus causes all containers to be rejected. (vinodkv) + + YARN-66. aggregated logs permissions not set properly (tgraves via bobby) + + YARN-68. NodeManager will refuse to shutdown indefinitely due to container + log aggregation (daryn via bobby) + + YARN-87. NM ResourceLocalizationService does not set permissions of local + cache directories (Jason Lowe via tgraves) diff --git a/aarch64/share/doc/hadoop/yarn/LICENSE.txt b/aarch64/share/doc/hadoop/yarn/LICENSE.txt new file mode 100644 index 0000000..59bcdbc --- /dev/null +++ b/aarch64/share/doc/hadoop/yarn/LICENSE.txt @@ -0,0 +1,244 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +APACHE HADOOP SUBCOMPONENTS: + +The Apache Hadoop project contains subcomponents with separate copyright +notices and license terms. Your use of the source code for the these +subcomponents is subject to the terms and conditions of the following +licenses. + +For the org.apache.hadoop.util.bloom.* classes: + +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract + * 034819 (http://www.one-lab.org) + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ diff --git a/aarch64/share/doc/hadoop/yarn/NOTICE.txt b/aarch64/share/doc/hadoop/yarn/NOTICE.txt new file mode 100644 index 0000000..62fc581 --- /dev/null +++ b/aarch64/share/doc/hadoop/yarn/NOTICE.txt @@ -0,0 +1,2 @@ +This product includes software developed by The Apache Software +Foundation (http://www.apache.org/). diff --git a/aarch64/share/hadoop/common/hadoop-common-2.2.0-tests.jar b/aarch64/share/hadoop/common/hadoop-common-2.2.0-tests.jar new file mode 100644 index 0000000..efe3964 Binary files /dev/null and b/aarch64/share/hadoop/common/hadoop-common-2.2.0-tests.jar differ diff --git a/aarch64/share/hadoop/common/hadoop-common-2.2.0.jar b/aarch64/share/hadoop/common/hadoop-common-2.2.0.jar new file mode 100644 index 0000000..5fb45d8 Binary files /dev/null and b/aarch64/share/hadoop/common/hadoop-common-2.2.0.jar differ diff --git a/aarch64/share/hadoop/common/hadoop-nfs-2.2.0.jar b/aarch64/share/hadoop/common/hadoop-nfs-2.2.0.jar new file mode 100644 index 0000000..f68b5dd Binary files /dev/null and b/aarch64/share/hadoop/common/hadoop-nfs-2.2.0.jar differ diff --git a/aarch64/share/hadoop/common/jdiff/hadoop-core_0.20.0.xml b/aarch64/share/hadoop/common/jdiff/hadoop-core_0.20.0.xml new file mode 100644 index 0000000..82bba33 --- /dev/null +++ b/aarch64/share/hadoop/common/jdiff/hadoop-core_0.20.0.xml @@ -0,0 +1,32308 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + final. + + @param name resource to be added, the classpath is examined for a file + with that name.]]> + + + + + + final. + + @param url url of the resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + final. + + @param file file-path of resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + final. + + @param in InputStream to deserialize the object from.]]> + + + + + + + + + + + name property, null if + no such property exists. + + Values are processed for variable expansion + before being returned. + + @param name the property name. + @return the value of the name property, + or null if no such property exists.]]> + + + + + + name property, without doing + variable expansion. + + @param name the property name. + @return the value of the name property, + or null if no such property exists.]]> + + + + + + + value of the name property. + + @param name property name. + @param value property value.]]> + + + + + + + + + + + + + + name property. If no such property + exists, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value, or defaultValue if the property + doesn't exist.]]> + + + + + + + name property as an int. + + If no such property exists, or if the specified value is not a valid + int, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as an int, + or defaultValue.]]> + + + + + + + name property to an int. + + @param name property name. + @param value int value of the property.]]> + + + + + + + name property as a long. + If no such property is specified, or if the specified value is not a valid + long, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a long, + or defaultValue.]]> + + + + + + + name property to a long. + + @param name property name. + @param value long value of the property.]]> + + + + + + + name property as a float. + If no such property is specified, or if the specified value is not a valid + float, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a float, + or defaultValue.]]> + + + + + + + name property to a float. + + @param name property name. + @param value property value.]]> + + + + + + + name property as a boolean. + If no such property is specified, or if the specified value is not a valid + boolean, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a boolean, + or defaultValue.]]> + + + + + + + name property to a boolean. + + @param name property name. + @param value boolean value of the property.]]> + + + + + + + + + + + + + + + + + + + + name property as + a collection of Strings. + If no such property is specified then empty collection is returned. +

+ This is an optimized version of {@link #getStrings(String)} + + @param name property name. + @return property value as a collection of Strings.]]> + + + + + + name property as + an array of Strings. + If no such property is specified then null is returned. + + @param name property name. + @return property value as an array of Strings, + or null.]]> + + + + + + + name property as + an array of Strings. + If no such property is specified then default value is returned. + + @param name property name. + @param defaultValue The default value + @return property value as an array of Strings, + or default value.]]> + + + + + + + name property as + as comma delimited values. + + @param name property name. + @param values The values]]> + + + + + + + + + + + + + + name property + as an array of Class. + The value of the property specifies a list of comma separated class names. + If no such property is specified, then defaultValue is + returned. + + @param name the property name. + @param defaultValue default value. + @return property value as a Class[], + or defaultValue.]]> + + + + + + + name property as a Class. + If no such property is specified, then defaultValue is + returned. + + @param name the class name. + @param defaultValue default value. + @return property value as a Class, + or defaultValue.]]> + + + + + + + + name property as a Class + implementing the interface specified by xface. + + If no such property is specified, then defaultValue is + returned. + + An exception is thrown if the returned class does not implement the named + interface. + + @param name the class name. + @param defaultValue default value. + @param xface the interface implemented by the named class. + @return property value as a Class, + or defaultValue.]]> + + + + + + + + name property to the name of a + theClass implementing the given interface xface. + + An exception is thrown if theClass does not implement the + interface xface. + + @param name property name. + @param theClass property value. + @param xface the interface implemented by the named class.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + + + + + name. + + @param name configuration resource name. + @return an input stream attached to the resource.]]> + + + + + + name. + + @param name configuration resource name. + @return a reader attached to the resource.]]> + + + + + + + + + + + + + + + String + key-value pairs in the configuration. + + @return an iterator over the entries.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + true to set quiet-mode on, false + to turn it off.]]> + + + + + + + + + + + + + + + + + + + Resources + +

Configurations are specified by resources. A resource contains a set of + name/value pairs as XML data. Each resource is named by either a + String or by a {@link Path}. If named by a String, + then the classpath is examined for a file with that name. If named by a + Path, then the local filesystem is examined directly, without + referring to the classpath. + +

Unless explicitly turned off, Hadoop by default specifies two + resources, loaded in-order from the classpath:

    +
  1. core-default.xml + : Read-only defaults for hadoop.
  2. +
  3. core-site.xml: Site-specific configuration for a given hadoop + installation.
  4. +
+ Applications may add additional resources, which are loaded + subsequent to these resources in the order they are added. + +

Final Parameters

+ +

Configuration parameters may be declared final. + Once a resource declares a value final, no subsequently-loaded + resource can alter that value. + For example, one might define a final parameter with: +

+  <property>
+    <name>dfs.client.buffer.dir</name>
+    <value>/tmp/hadoop/dfs/client</value>
+    <final>true</final>
+  </property>
+ + Administrators typically define parameters as final in + core-site.xml for values that user applications may not alter. + +

Variable Expansion

+ +

Value strings are first processed for variable expansion. The + available properties are:

    +
  1. Other properties defined in this Configuration; and, if a name is + undefined here,
  2. +
  3. Properties in {@link System#getProperties()}.
  4. +
+ +

For example, if a configuration resource contains the following property + definitions: +

+  <property>
+    <name>basedir</name>
+    <value>/user/${user.name}</value>
+  </property>
+  
+  <property>
+    <name>tempdir</name>
+    <value>${basedir}/tmp</value>
+  </property>
+ + When conf.get("tempdir") is called, then ${basedir} + will be resolved to another property in this Configuration, while + ${user.name} would then ordinarily be resolved to the value + of the System property with that name.]]> +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DistributedCache is a facility provided by the Map-Reduce + framework to cache files (text, archives, jars etc.) needed by applications. +

+ +

Applications specify the files, via urls (hdfs:// or http://) to be cached + via the {@link org.apache.hadoop.mapred.JobConf}. + The DistributedCache assumes that the + files specified via hdfs:// urls are already present on the + {@link FileSystem} at the path specified by the url.

+ +

The framework will copy the necessary files on to the slave node before + any tasks for the job are executed on that node. Its efficiency stems from + the fact that the files are only copied once per job and the ability to + cache archives which are un-archived on the slaves.

+ +

DistributedCache can be used to distribute simple, read-only + data/text files and/or more complex types such as archives, jars etc. + Archives (zip, tar and tgz/tar.gz files) are un-archived at the slave nodes. + Jars may be optionally added to the classpath of the tasks, a rudimentary + software distribution mechanism. Files have execution permissions. + Optionally users can also direct it to symlink the distributed cache file(s) + into the working directory of the task.

+ +

DistributedCache tracks modification timestamps of the cache + files. Clearly the cache files should not be modified by the application + or externally while the job is executing.

+ +

Here is an illustrative example on how to use the + DistributedCache:

+

+     // Setting up the cache for the application
+     
+     1. Copy the requisite files to the FileSystem:
+     
+     $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat  
+     $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip  
+     $ bin/hadoop fs -copyFromLocal mylib.jar /myapp/mylib.jar
+     $ bin/hadoop fs -copyFromLocal mytar.tar /myapp/mytar.tar
+     $ bin/hadoop fs -copyFromLocal mytgz.tgz /myapp/mytgz.tgz
+     $ bin/hadoop fs -copyFromLocal mytargz.tar.gz /myapp/mytargz.tar.gz
+     
+     2. Setup the application's JobConf:
+     
+     JobConf job = new JobConf();
+     DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"), 
+                                   job);
+     DistributedCache.addCacheArchive(new URI("/myapp/map.zip", job);
+     DistributedCache.addFileToClassPath(new Path("/myapp/mylib.jar"), job);
+     DistributedCache.addCacheArchive(new URI("/myapp/mytar.tar", job);
+     DistributedCache.addCacheArchive(new URI("/myapp/mytgz.tgz", job);
+     DistributedCache.addCacheArchive(new URI("/myapp/mytargz.tar.gz", job);
+     
+     3. Use the cached files in the {@link org.apache.hadoop.mapred.Mapper}
+     or {@link org.apache.hadoop.mapred.Reducer}:
+     
+     public static class MapClass extends MapReduceBase  
+     implements Mapper<K, V, K, V> {
+     
+       private Path[] localArchives;
+       private Path[] localFiles;
+       
+       public void configure(JobConf job) {
+         // Get the cached archives/files
+         localArchives = DistributedCache.getLocalCacheArchives(job);
+         localFiles = DistributedCache.getLocalCacheFiles(job);
+       }
+       
+       public void map(K key, V value, 
+                       OutputCollector<K, V> output, Reporter reporter) 
+       throws IOException {
+         // Use data from the cached archives/files here
+         // ...
+         // ...
+         output.collect(k, v);
+       }
+     }
+     
+ 

+ + @see org.apache.hadoop.mapred.JobConf + @see org.apache.hadoop.mapred.JobClient]]> +
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BufferedFSInputStream + with the specified buffer size, + and saves its argument, the input stream + in, for later use. An internal + buffer array of length size + is created and stored in buf. + + @param in the underlying input stream. + @param size the buffer size. + @exception IllegalArgumentException if size <= 0.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + setReplication of FileSystem + @param src file name + @param replication new replication + @throws IOException + @return true if successful; + false if file does not exist or is a directory]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + fs.scheme.class whose value names the FileSystem class. + The entire URI is passed to the FileSystem instance's initialize method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Return all the files that match filePattern and are not checksum + files. Results are sorted by their names. + +

+ A filename pattern is composed of regular characters and + special pattern matching characters, which are: + +

+
+
+

+

? +
Matches any single character. + +

+

* +
Matches zero or more characters. + +

+

[abc] +
Matches a single character from character set + {a,b,c}. + +

+

[a-b] +
Matches a single character from the character range + {a...b}. Note that character a must be + lexicographically less than or equal to character b. + +

+

[^a] +
Matches a single character that is not from character set or range + {a}. Note that the ^ character must occur + immediately to the right of the opening bracket. + +

+

\c +
Removes (escapes) any special meaning of character c. + +

+

{ab,cd} +
Matches a string from the string set {ab, cd} + +

+

{ab,c{de,fh}} +
Matches a string from the string set {ab, cde, cfh} + +
+
+
+ + @param pathPattern a regular expression specifying a pth pattern + + @return an array of paths that match the path pattern + @throws IOException]]> +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + All user code that may potentially use the Hadoop Distributed + File System should be written to use a FileSystem object. The + Hadoop DFS is a multi-machine system that appears as a single + disk. It's useful because of its fault tolerance and potentially + very large capacity. + +

+ The local implementation is {@link LocalFileSystem} and distributed + implementation is DistributedFileSystem.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FilterFileSystem contains + some other file system, which it uses as + its basic file system, possibly transforming + the data along the way or providing additional + functionality. The class FilterFileSystem + itself simply overrides all methods of + FileSystem with versions that + pass all requests to the contained file + system. Subclasses of FilterFileSystem + may further override some of these methods + and may also provide additional methods + and fields.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + buf at offset + and checksum into checksum. + The method is used for implementing read, therefore, it should be optimized + for sequential reading + @param pos chunkPos + @param buf desitination buffer + @param offset offset in buf at which to store data + @param len maximun number of bytes to read + @return number of bytes read]]> + + + + + + + + + + + + + + + + + -1 if the end of the + stream is reached. + @exception IOException if an I/O error occurs.]]> + + + + + + + + + This method implements the general contract of the corresponding + {@link InputStream#read(byte[], int, int) read} method of + the {@link InputStream} class. As an additional + convenience, it attempts to read as many bytes as possible by repeatedly + invoking the read method of the underlying stream. This + iterated read continues until one of the following + conditions becomes true:

    + +
  • The specified number of bytes have been read, + +
  • The read method of the underlying stream returns + -1, indicating end-of-file. + +
If the first read on the underlying stream returns + -1 to indicate end-of-file then this method returns + -1. Otherwise this method returns the number of bytes + actually read. + + @param b destination buffer. + @param off offset at which to start storing bytes. + @param len maximum number of bytes to read. + @return the number of bytes read, or -1 if the end of + the stream has been reached. + @exception IOException if an I/O error occurs. + ChecksumException if any checksum error occurs]]> +
+ + + + + + + + + + + + + + + + + + n bytes of data from the + input stream. + +

This method may skip more bytes than are remaining in the backing + file. This produces no exception and the number of bytes skipped + may include some number of bytes that were beyond the EOF of the + backing file. Attempting to read from the stream after skipping past + the end will result in -1 indicating the end of the file. + +

If n is negative, no bytes are skipped. + + @param n the number of bytes to be skipped. + @return the actual number of bytes skipped. + @exception IOException if an I/O error occurs. + ChecksumException if the chunk to skip to is corrupted]]> + + + + + + + This method may seek past the end of the file. + This produces no exception and an attempt to read from + the stream will result in -1 indicating the end of the file. + + @param pos the postion to seek to. + @exception IOException if an I/O error occurs. + ChecksumException if the chunk to seek to is corrupted]]> + + + + + + + + + + len bytes from + stm + + @param stm an input stream + @param buf destiniation buffer + @param offset offset at which to store data + @param len number of bytes to read + @return actual number of bytes read + @throws IOException if there is any IO error]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + len bytes from the specified byte array + starting at offset off and generate a checksum for + each data chunk. + +

This method stores bytes from the given array into this + stream's buffer before it gets checksumed. The buffer gets checksumed + and flushed to the underlying output stream when all data + in a checksum chunk are in the buffer. If the buffer is empty and + requested length is at least as large as the size of next checksum chunk + size, this method will checksum and write the chunk directly + to the underlying output stream. Thus it avoids uneccessary data copy. + + @param b the data. + @param off the start offset in the data. + @param len the number of bytes to write. + @exception IOException if an I/O error occurs.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if and only if pathname + should be included]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + trash feature. Files are moved to a user's trash + directory, a subdirectory of their home directory named ".Trash". Files are + initially moved to a current sub-directory of the trash directory. + Within that sub-directory their original path is preserved. Periodically + one may checkpoint the current trash and remove older checkpoints. (This + design permits trash management without enumeration of the full trash + content, without date support in the filesystem, and without clock + synchronization.)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} backed by an FTP client provided by Apache Commons Net. +

]]> +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A client for the Kosmos filesystem (KFS) + +

Introduction

+ +This pages describes how to use Kosmos Filesystem +( KFS ) as a backing +store with Hadoop. This page assumes that you have downloaded the +KFS software and installed necessary binaries as outlined in the KFS +documentation. + +

Steps

+ +
    +
  • In the Hadoop conf directory edit core-site.xml, + add the following: +
    +<property>
    +  <name>fs.kfs.impl</name>
    +  <value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value>
    +  <description>The FileSystem for kfs: uris.</description>
    +</property>
    +            
    + +
  • In the Hadoop conf directory edit core-site.xml, + adding the following (with appropriate values for + <server> and <port>): +
    +<property>
    +  <name>fs.default.name</name>
    +  <value>kfs://<server:port></value> 
    +</property>
    +
    +<property>
    +  <name>fs.kfs.metaServerHost</name>
    +  <value><server></value>
    +  <description>The location of the KFS meta server.</description>
    +</property>
    +
    +<property>
    +  <name>fs.kfs.metaServerPort</name>
    +  <value><port></value>
    +  <description>The location of the meta server's port.</description>
    +</property>
    +
    +
    +
  • + +
  • Copy KFS's kfs-0.1.jar to Hadoop's lib directory. This step + enables Hadoop's to load the KFS specific modules. Note + that, kfs-0.1.jar was built when you compiled KFS source + code. This jar file contains code that calls KFS's client + library code via JNI; the native code is in KFS's + libkfsClient.so library. +
  • + +
  • When the Hadoop map/reduce trackers start up, those +processes (on local as well as remote nodes) will now need to load +KFS's libkfsClient.so library. To simplify this process, it is advisable to +store libkfsClient.so in an NFS accessible directory (similar to where +Hadoop binaries/scripts are stored); then, modify Hadoop's +conf/hadoop-env.sh adding the following line and providing suitable +value for <path>: +
    +export LD_LIBRARY_PATH=<path>
    +
    + + +
  • Start only the map/reduce trackers +
    + example: execute Hadoop's bin/start-mapred.sh
  • +
+
+ +If the map/reduce job trackers start up, all file-I/O is done to KFS.]]> +
+
+ + + + + + + + + + + + + + + + (cause==null ? null : cause.toString()) (which + typically contains the class and detail message of cause). + @param cause the cause (which is saved for later retrieval by the + {@link #getCause()} method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is a tool for migrating data from an older to a newer version + of an S3 filesystem. +

+

+ All files in the filesystem are migrated by re-writing the block metadata + - no datafiles are touched. +

]]> +
+
+ + + + + + + + + + + + + + + + + + + Extracts AWS credentials from the filesystem URI or configuration. +

]]> +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A block-based {@link FileSystem} backed by + Amazon S3. +

+ @see NativeS3FileSystem]]> +
+
+ + + + + + + + + + + + + + + + + + + + + A distributed, block-based implementation of {@link +org.apache.hadoop.fs.FileSystem} that uses Amazon S3 +as a backing store.

+ +

+Files are stored in S3 as blocks (represented by +{@link org.apache.hadoop.fs.s3.Block}), which have an ID and a length. +Block metadata is stored in S3 as a small record (represented by +{@link org.apache.hadoop.fs.s3.INode}) using the URL-encoded +path string as a key. Inodes record the file type (regular file or directory) and the list of blocks. +This design makes it easy to seek to any given position in a file by reading the inode data to compute +which block to access, then using S3's support for +HTTP Range headers +to start streaming from the correct position. +Renames are also efficient since only the inode is moved (by a DELETE followed by a PUT since +S3 does not support renames). +

+

+For a single file /dir1/file1 which takes two blocks of storage, the file structure in S3 +would be something like this: +

+
+/
+/dir1
+/dir1/file1
+block-6415776850131549260
+block-3026438247347758425
+
+

+Inodes start with a leading /, while blocks are prefixed with block-. +

]]> +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If f is a file, this method will make a single call to S3. + If f is a directory, this method will make a maximum of + (n / 1000) + 2 calls to S3, where n is the total number of + files and directories contained directly in f. +

]]> +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} for reading and writing files stored on + Amazon S3. + Unlike {@link org.apache.hadoop.fs.s3.S3FileSystem} this implementation + stores files on S3 in their + native form so they can be read by other S3 tools. +

+ @see org.apache.hadoop.fs.s3.S3FileSystem]]> +
+
+ + + +A distributed implementation of {@link +org.apache.hadoop.fs.FileSystem} for reading and writing files on +Amazon S3. +Unlike {@link org.apache.hadoop.fs.s3.S3FileSystem}, which is block-based, +this implementation stores +files on S3 in their native form for interoperability with other S3 tools. +

]]> +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + . + @param name The name of the server + @param port The port to use on the server + @param findPort whether the server should start at the given port and + increment by 1 until it finds a free port. + @param conf Configuration]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + points to the log directory + "/static/" -> points to common static files (src/webapps/static) + "/" -> the jsp server code from (src/webapps/)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + nth value.]]> + + + + + + + + + + + + + + + + + + + + + nth value in the file.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + public class IntArrayWritable extends ArrayWritable { + public IntArrayWritable() { + super(IntWritable.class); + } + } + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a ByteWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new DataInputStream and + ByteArrayInputStream each time data is read. + +

Typical usage is something like the following:

+
+ DataInputBuffer buffer = new DataInputBuffer();
+ while (... loop condition ...) {
+   byte[] data = ... get data ...;
+   int dataLength = ... get data length ...;
+   buffer.reset(data, dataLength);
+   ... read buffer using DataInput methods ...
+ }
+ 
]]> +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new DataOutputStream and + ByteArrayOutputStream each time data is written. + +

Typical usage is something like the following:

+
+ DataOutputBuffer buffer = new DataOutputBuffer();
+ while (... loop condition ...) {
+   buffer.reset();
+   ... write buffer using DataOutput methods ...
+   byte[] data = buffer.getData();
+   int dataLength = buffer.getLength();
+   ... write data to its ultimate destination ...
+ }
+ 
]]> +
+
+ + + + + + + + + + + + + + + + + + + + + + + + the class of the item + @param conf the configuration to store + @param item the object to be stored + @param keyName the name of the key to use + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param items the objects to be stored + @param keyName the name of the key to use + @throws IndexOutOfBoundsException if the items array is empty + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + DefaultStringifier offers convenience methods to store/load objects to/from + the configuration. + + @param the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a DoubleWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a FloatWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When two sequence files, which have same Key type but different Value + types, are mapped out to reduce, multiple Value types is not allowed. + In this case, this class can help you wrap instances with different types. +

+ +

+ Compared with ObjectWritable, this class is much more effective, + because ObjectWritable will append the class declaration as a String + into the output file in every Key-Value pair. +

+ +

+ Generic Writable implements {@link Configurable} interface, so that it will be + configured by the framework. The configuration is passed to the wrapped objects + implementing {@link Configurable} interface before deserialization. +

+ + how to use it:
+ 1. Write your own class, such as GenericObject, which extends GenericWritable.
+ 2. Implements the abstract method getTypes(), defines + the classes which will be wrapped in GenericObject in application. + Attention: this classes defined in getTypes() method, must + implement Writable interface. +

+ + The code looks like this: +
+ public class GenericObject extends GenericWritable {
+ 
+   private static Class[] CLASSES = {
+               ClassType1.class, 
+               ClassType2.class,
+               ClassType3.class,
+               };
+
+   protected Class[] getTypes() {
+       return CLASSES;
+   }
+
+ }
+ 
+ + @since Nov 8, 2006]]> +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new InputStream and + ByteArrayInputStream each time data is read. + +

Typical usage is something like the following:

+
+ InputBuffer buffer = new InputBuffer();
+ while (... loop condition ...) {
+   byte[] data = ... get data ...;
+   int dataLength = ... get data length ...;
+   buffer.reset(data, dataLength);
+   ... read buffer using InputStream methods ...
+ }
+ 
+ @see DataInputBuffer + @see DataOutput]]> +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a IntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + closes the input and output streams + at the end. + @param in InputStrem to read from + @param out OutputStream to write to + @param conf the Configuration object]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ignore any {@link IOException} or + null pointers. Must only be used for cleanup in exception handlers. + @param log the log to record problems to at debug level. Can be null. + @param closeables the objects to close]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a LongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A map is a directory containing two files, the data file, + containing all keys and values in the map, and a smaller index + file, containing a fraction of the keys. The fraction is determined by + {@link Writer#getIndexInterval()}. + +

The index file is read entirely into memory. Thus key implementations + should try to keep themselves small. + +

Map files are created by adding entries in-order. To maintain a large + database, perform updates by copying the previous version of a database and + merging in a sorted change list, to create a new version of the database in + a new file. Sorting large change lists can be done with {@link + SequenceFile.Sorter}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key and + val. Returns true if such a pair exists and false when at + the end of the map]]> + + + + + + + + + + + + + + + + key or if it does not exist, at the first entry + after the named key. + +- * @param key - key that we're trying to find +- * @param val - data value if key is found +- * @return - the key that was the closest match or null if eof.]]> + + + + + + + + + key does not exist, return + the first entry that falls just before the key. Otherwise, + return the record that sorts just after. + @return - the key that was the closest match or null if eof.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is an MD5Hash whose digest contains the + same values.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new OutputStream and + ByteArrayOutputStream each time data is written. + +

Typical usage is something like the following:

+
+ OutputBuffer buffer = new OutputBuffer();
+ while (... loop condition ...) {
+   buffer.reset();
+   ... write buffer using OutputStream methods ...
+   byte[] data = buffer.getData();
+   int dataLength = buffer.getLength();
+   ... write data to its ultimate destination ...
+ }
+ 
+ @see DataOutputBuffer + @see InputBuffer]]> +
+
+ + + + + + + + + + + + + + + A {@link Comparator} that operates directly on byte representations of + objects. +

+ @param + @see DeserializerComparator]]> +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SequenceFiles are flat files consisting of binary key/value + pairs. + +

SequenceFile provides {@link Writer}, {@link Reader} and + {@link Sorter} classes for writing, reading and sorting respectively.

+ + There are three SequenceFile Writers based on the + {@link CompressionType} used to compress key/value pairs: +
    +
  1. + Writer : Uncompressed records. +
  2. +
  3. + RecordCompressWriter : Record-compressed files, only compress + values. +
  4. +
  5. + BlockCompressWriter : Block-compressed files, both keys & + values are collected in 'blocks' + separately and compressed. The size of + the 'block' is configurable. +
+ +

The actual compression algorithm used to compress key and/or values can be + specified by using the appropriate {@link CompressionCodec}.

+ +

The recommended way is to use the static createWriter methods + provided by the SequenceFile to chose the preferred format.

+ +

The {@link Reader} acts as the bridge and can read any of the above + SequenceFile formats.

+ +

SequenceFile Formats

+ +

Essentially there are 3 different formats for SequenceFiles + depending on the CompressionType specified. All of them share a + common header described below. + +

+
    +
  • + version - 3 bytes of magic header SEQ, followed by 1 byte of actual + version number (e.g. SEQ4 or SEQ6) +
  • +
  • + keyClassName -key class +
  • +
  • + valueClassName - value class +
  • +
  • + compression - A boolean which specifies if compression is turned on for + keys/values in this file. +
  • +
  • + blockCompression - A boolean which specifies if block-compression is + turned on for keys/values in this file. +
  • +
  • + compression codec - CompressionCodec class which is used for + compression of keys and/or values (if compression is + enabled). +
  • +
  • + metadata - {@link Metadata} for this file. +
  • +
  • + sync - A sync marker to denote end of the header. +
  • +
+ +
Uncompressed SequenceFile Format
+
    +
  • + Header +
  • +
  • + Record +
      +
    • Record length
    • +
    • Key length
    • +
    • Key
    • +
    • Value
    • +
    +
  • +
  • + A sync-marker every few 100 bytes or so. +
  • +
+ +
Record-Compressed SequenceFile Format
+
    +
  • + Header +
  • +
  • + Record +
      +
    • Record length
    • +
    • Key length
    • +
    • Key
    • +
    • Compressed Value
    • +
    +
  • +
  • + A sync-marker every few 100 bytes or so. +
  • +
+ +
Block-Compressed SequenceFile Format
+
    +
  • + Header +
  • +
  • + Record Block +
      +
    • Compressed key-lengths block-size
    • +
    • Compressed key-lengths block
    • +
    • Compressed keys block-size
    • +
    • Compressed keys block
    • +
    • Compressed value-lengths block-size
    • +
    • Compressed value-lengths block
    • +
    • Compressed values block-size
    • +
    • Compressed values block
    • +
    +
  • +
  • + A sync-marker every few 100 bytes or so. +
  • +
+ +

The compressed blocks of key lengths and value lengths consist of the + actual lengths of individual keys/values encoded in ZeroCompressedInteger + format.

+ + @see CompressionCodec]]> +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key, skipping its + value. True if another entry exists, and false at end of file.]]> + + + + + + + + key and + val. Returns true if such a pair exists and false when at + end of file]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The position passed must be a position returned by {@link + SequenceFile.Writer#getLength()} when writing this file. To seek to an arbitrary + position, use {@link SequenceFile.Reader#sync(long)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SegmentDescriptor + @param segments the list of SegmentDescriptors + @param tmpDir the directory to write temporary files into + @return RawKeyValueIterator + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For best performance, applications should make sure that the {@link + Writable#readFields(DataInput)} implementation of their keys is + very efficient. In particular, it should avoid allocating memory.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This always returns a synchronized position. In other words, + immediately after calling {@link SequenceFile.Reader#seek(long)} with a position + returned by this method, {@link SequenceFile.Reader#next(Writable)} may be called. However + the key may be earlier in the file than key last written when this + method was called (e.g., with block-compression, it may be the first key + in the block that was being written when this method was called).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key. Returns + true if such a key exists and false when at the end of the set.]]> + + + + + + + key. + Returns key, or null if no match exists.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + position. Note that this + method avoids using the converter or doing String instatiation + @return the Unicode scalar value at position or -1 + if the position is invalid or points to a + trailing byte]]> + + + + + + + + + + what in the backing + buffer, starting as position start. The starting + position is measured in bytes and the return value is in + terms of byte position in the buffer. The backing buffer is + not converted to a string for this operation. + @return byte position of the first occurence of the search + string in the UTF-8 buffer or -1 if not found]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a Text with the same contents.]]> + + + + + + + + + + + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException.]]> + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException. + @return ByteBuffer: bytes stores at ByteBuffer.array() + and length is ByteBuffer.limit()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + In + addition, it provides methods for string traversal without converting the + byte array to a string.

Also includes utilities for + serializing/deserialing a string, coding/decoding a string, checking if a + byte array contains valid UTF8 code, calculating the length of an encoded + string.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a UTF8 with the same contents.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + Also includes utilities for efficiently reading and writing UTF-8. + + @deprecated replaced by Text]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is useful when a class may evolve, so that instances written by the + old version of the class may still be processed by the new version. To + handle this situation, {@link #readFields(DataInput)} + implementations should catch {@link VersionMismatchException}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VIntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VLongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + out. + + @param out DataOuput to serialize this object into. + @throws IOException]]> + + + + + + + in. + +

For efficiency, implementations should attempt to re-use storage in the + existing object where possible.

+ + @param in DataInput to deseriablize this object from. + @throws IOException]]> +
+ + + Any key or value type in the Hadoop Map-Reduce + framework implements this interface.

+ +

Implementations typically implement a static read(DataInput) + method which constructs a new instance, calls {@link #readFields(DataInput)} + and returns the instance.

+ +

Example:

+

+     public class MyWritable implements Writable {
+       // Some data     
+       private int counter;
+       private long timestamp;
+       
+       public void write(DataOutput out) throws IOException {
+         out.writeInt(counter);
+         out.writeLong(timestamp);
+       }
+       
+       public void readFields(DataInput in) throws IOException {
+         counter = in.readInt();
+         timestamp = in.readLong();
+       }
+       
+       public static MyWritable read(DataInput in) throws IOException {
+         MyWritable w = new MyWritable();
+         w.readFields(in);
+         return w;
+       }
+     }
+ 

]]> +
+ + + + + + + + WritableComparables can be compared to each other, typically + via Comparators. Any type which is to be used as a + key in the Hadoop Map-Reduce framework should implement this + interface.

+ +

Example:

+

+     public class MyWritableComparable implements WritableComparable {
+       // Some data
+       private int counter;
+       private long timestamp;
+       
+       public void write(DataOutput out) throws IOException {
+         out.writeInt(counter);
+         out.writeLong(timestamp);
+       }
+       
+       public void readFields(DataInput in) throws IOException {
+         counter = in.readInt();
+         timestamp = in.readLong();
+       }
+       
+       public int compareTo(MyWritableComparable w) {
+         int thisValue = this.value;
+         int thatValue = ((IntWritable)o).value;
+         return (thisValue < thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
+       }
+     }
+ 

]]> +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The default implementation reads the data into two {@link + WritableComparable}s (using {@link + Writable#readFields(DataInput)}, then calls {@link + #compare(WritableComparable,WritableComparable)}.]]> + + + + + + + The default implementation uses the natural ordering, calling {@link + Comparable#compareTo(Object)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This base implemenation uses the natural ordering. To define alternate + orderings, override {@link #compare(WritableComparable,WritableComparable)}. + +

One may optimize compare-intensive operations by overriding + {@link #compare(byte[],int,int,byte[],int,int)}. Static utility methods are + provided to assist in optimized implementations of this method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Enum type + @param in DataInput to read from + @param enumType Class type of Enum + @return Enum represented by String read from DataInput + @throws IOException]]> + + + + + + + + + + + + + + + + len number of bytes in input streamin + @param in input stream + @param len number of bytes to skip + @throws IOException when skipped less number of bytes]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CompressionCodec for which to get the + Compressor + @return Compressor for the given + CompressionCodec from the pool or a new one]]> + + + + + + CompressionCodec for which to get the + Decompressor + @return Decompressor for the given + CompressionCodec the pool or a new one]]> + + + + + + Compressor to be returned to the pool]]> + + + + + + Decompressor to be returned to the + pool]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Implementations are assumed to be buffered. This permits clients to + reposition the underlying input stream then call {@link #resetState()}, + without having to also synchronize client buffers.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + true if a preset dictionary is needed for decompression. + @return true if a preset dictionary is needed for decompression]]> + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FIXME: This array should be in a private or package private location, + since it could be modified by malicious code. +

]]> +
+ + + + This interface is public for historical purposes. You should have no need to + use it. +

]]> +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Although BZip2 headers are marked with the magic "Bz" this + constructor expects the next byte in the stream to be the first one after + the magic. Thus callers have to skip the first two bytes. Otherwise this + constructor will throw an exception. +

+ + @throws IOException + if the stream content is malformed or an I/O error occurs. + @throws NullPointerException + if in == null]]> +
+
+ + + + + + + + + + + + + + + The decompression requires large amounts of memory. Thus you should call the + {@link #close() close()} method as soon as possible, to force + CBZip2InputStream to release the allocated memory. See + {@link CBZip2OutputStream CBZip2OutputStream} for information about memory + usage. +

+ +

+ CBZip2InputStream reads bytes from the compressed source stream via + the single byte {@link java.io.InputStream#read() read()} method exclusively. + Thus you should consider to use a buffered source stream. +

+ +

+ Instances of this class are not threadsafe. +

]]> +
+
+ + + + + + + + CBZip2OutputStream with a blocksize of 900k. + +

+ Attention: The caller is resonsible to write the two BZip2 magic + bytes "BZ" to the specified stream prior to calling this + constructor. +

+ + @param out * + the destination stream. + + @throws IOException + if an I/O error occurs in the specified stream. + @throws NullPointerException + if out == null.]]> +
+
+ + + + CBZip2OutputStream with specified blocksize. + +

+ Attention: The caller is resonsible to write the two BZip2 magic + bytes "BZ" to the specified stream prior to calling this + constructor. +

+ + + @param out + the destination stream. + @param blockSize + the blockSize as 100k units. + + @throws IOException + if an I/O error occurs in the specified stream. + @throws IllegalArgumentException + if (blockSize < 1) || (blockSize > 9). + @throws NullPointerException + if out == null. + + @see #MIN_BLOCKSIZE + @see #MAX_BLOCKSIZE]]> +
+
+ + + + + + + + + + + + + inputLength this method returns MAX_BLOCKSIZE + always. + + @param inputLength + The length of the data which will be compressed by + CBZip2OutputStream.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + == 1.]]> + + + + + == 9.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If you are ever unlucky/improbable enough to get a stack overflow whilst + sorting, increase the following constant and try again. In practice I + have never seen the stack go above 27 elems, so the following limit seems + very generous. +

]]> +
+
+ + + The compression requires large amounts of memory. Thus you should call the + {@link #close() close()} method as soon as possible, to force + CBZip2OutputStream to release the allocated memory. +

+ +

+ You can shrink the amount of allocated memory and maybe raise the compression + speed by choosing a lower blocksize, which in turn may cause a lower + compression ratio. You can avoid unnecessary memory allocation by avoiding + using a blocksize which is bigger than the size of the input. +

+ +

+ You can compute the memory usage for compressing by the following formula: +

+ +
+ <code>400k + (9 * blocksize)</code>.
+ 
+ +

+ To get the memory required for decompression by {@link CBZip2InputStream + CBZip2InputStream} use +

+ +
+ <code>65k + (5 * blocksize)</code>.
+ 
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Memory usage by blocksize
Blocksize Compression
+ memory usage
Decompression
+ memory usage
100k1300k565k
200k2200k1065k
300k3100k1565k
400k4000k2065k
500k4900k2565k
600k5800k3065k
700k6700k3565k
800k7600k4065k
900k8500k4565k
+ +

+ For decompression CBZip2InputStream allocates less memory if the + bzipped input is smaller than one block. +

+ +

+ Instances of this class are not threadsafe. +

+ +

+ TODO: Update to BZip2 1.0.1 +

]]> +
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return the total (non-negative) number of uncompressed bytes input so far]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return the total (non-negative) number of uncompressed bytes input so far]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if native-zlib is loaded & initialized + and can be loaded for this job, else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Keep trying a limited number of times, waiting a fixed time between attempts, + and then fail by re-throwing the exception. +

]]> +
+
+ + + + + + + Keep trying for a maximum time, waiting a fixed time between attempts, + and then fail by re-throwing the exception. +

]]> +
+
+ + + + + + + Keep trying a limited number of times, waiting a growing amount of time between attempts, + and then fail by re-throwing the exception. + The time between attempts is sleepTime mutliplied by the number of tries so far. +

]]> +
+
+ + + + + + + Keep trying a limited number of times, waiting a growing amount of time between attempts, + and then fail by re-throwing the exception. + The time between attempts is sleepTime mutliplied by a random + number in the range of [0, 2 to the number of retries) +

]]> +
+
+ + + + + + Set a default policy with some explicit handlers for specific exceptions. +

]]> +
+
+ + + + + + A retry policy for RemoteException + Set a default policy with some explicit handlers for specific exceptions. +

]]> +
+
+ + + + Try once, and fail by re-throwing the exception. + This corresponds to having no retry mechanism in place. +

]]> +
+
+ + + + Try once, and fail silently for void methods, or by + re-throwing the exception for non-void methods. +

]]> +
+
+ + + + Keep trying forever. +

]]> +
+
+ + + A collection of useful implementations of {@link RetryPolicy}. +

]]> +
+
+ + + + + + + + + + Determines whether the framework should retry a + method for the given exception, and the number + of retries that have been made for that operation + so far. +

+ @param e The exception that caused the method to fail. + @param retries The number of times the method has been retried. + @return true if the method should be retried, + false if the method should not be retried + but shouldn't fail with an exception (only for void methods). + @throws Exception The re-thrown exception e indicating + that the method failed and should not be retried further.]]> +
+
+ + + Specifies a policy for retrying method failures. + Implementations of this interface should be immutable. +

]]> +
+
+ + + + + + + + + + + + Create a proxy for an interface of an implementation class + using the same retry policy for each method in the interface. +

+ @param iface the interface that the retry will implement + @param implementation the instance whose methods should be retried + @param retryPolicy the policy for retirying method call failures + @return the retry proxy]]> +
+
+ + + + + + + Create a proxy for an interface of an implementation class + using the a set of retry policies specified by method name. + If no retry policy is defined for a method then a default of + {@link RetryPolicies#TRY_ONCE_THEN_FAIL} is used. +

+ @param iface the interface that the retry will implement + @param implementation the instance whose methods should be retried + @param methodNameToPolicyMap a map of method names to retry policies + @return the retry proxy]]> +
+
+ + + A factory for creating retry proxies. +

]]> +
+
+ + + +A mechanism for selectively retrying methods that throw exceptions under certain circumstances. +

+ +

+Typical usage is +

+ +
+UnreliableImplementation unreliableImpl = new UnreliableImplementation();
+UnreliableInterface unreliable = (UnreliableInterface)
+  RetryProxy.create(UnreliableInterface.class, unreliableImpl,
+    RetryPolicies.retryUpToMaximumCountWithFixedSleep(4, 10, TimeUnit.SECONDS));
+unreliable.call();
+
+ +

+This will retry any method called on unreliable four times - in this case the call() +method - sleeping 10 seconds between +each retry. There are a number of {@link org.apache.hadoop.io.retry.RetryPolicies retry policies} +available, or you can implement a custom one by implementing {@link org.apache.hadoop.io.retry.RetryPolicy}. +It is also possible to specify retry policies on a +{@link org.apache.hadoop.io.retry.RetryProxy#create(Class, Object, Map) per-method basis}. +

]]> +
+
+ + + + + + + + Prepare the deserializer for reading.

]]> +
+
+ + + + + + Deserialize the next object from the underlying input stream. + If the object t is non-null then this deserializer + may set its internal state to the next object read from the input + stream. Otherwise, if the object t is null a new + deserialized object will be created. +

+ @return the deserialized object]]> +
+
+ + + + Close the underlying input stream and clear up any resources.

]]> +
+
+ + + Provides a facility for deserializing objects of type from an + {@link InputStream}. +

+ +

+ Deserializers are stateful, but must not buffer the input since + other producers may read from the input between calls to + {@link #deserialize(Object)}. +

+ @param ]]> +
+
+ + + + + + + + + + + + + + + + + + A {@link RawComparator} that uses a {@link Deserializer} to deserialize + the objects to be compared so that the standard {@link Comparator} can + be used to compare them. +

+

+ One may optimize compare-intensive operations by using a custom + implementation of {@link RawComparator} that operates directly + on byte representations. +

+ @param ]]> +
+
+ + + + + + + + + + + + + + + + + + An experimental {@link Serialization} for Java {@link Serializable} classes. +

+ @see JavaSerializationComparator]]> +
+
+ + + + + + + + + + + + + A {@link RawComparator} that uses a {@link JavaSerialization} + {@link Deserializer} to deserialize objects that are then compared via + their {@link Comparable} interfaces. +

+ @param + @see JavaSerialization]]> +
+
+ + + + + + + + + + + + + + + + + + + + + + + + Encapsulates a {@link Serializer}/{@link Deserializer} pair. +

+ @param ]]> +
+
+ + + + + + + Serializations are found by reading the io.serializations + property from conf, which is a comma-delimited list of + classnames. +

]]> +
+
+ + + + + + + + + + + + A factory for {@link Serialization}s. +

]]> +
+
+ + + + + + + + Prepare the serializer for writing.

]]> +
+
+ + + + + Serialize t to the underlying output stream.

]]> +
+
+ + + + Close the underlying output stream and clear up any resources.

]]> +
+
+ + + Provides a facility for serializing objects of type to an + {@link OutputStream}. +

+ +

+ Serializers are stateful, but must not buffer the output since + other producers may write to the output between calls to + {@link #serialize(Object)}. +

+ @param ]]> +
+
+ + + + + + + + + + + + + + + + + + + + + + +This package provides a mechanism for using different serialization frameworks +in Hadoop. The property "io.serializations" defines a list of +{@link org.apache.hadoop.io.serializer.Serialization}s that know how to create +{@link org.apache.hadoop.io.serializer.Serializer}s and +{@link org.apache.hadoop.io.serializer.Deserializer}s. +

+ +

+To add a new serialization framework write an implementation of +{@link org.apache.hadoop.io.serializer.Serialization} and add its name to the +"io.serializations" property. +

]]> +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + param, to the IPC server running at + address, returning the value. Throws exceptions if there are + network problems or if the remote code threw an exception. + @deprecated Use {@link #call(Writable, InetSocketAddress, Class, UserGroupInformation)} instead]]> + + + + + + + + + + param, to the IPC server running at + address with the ticket credentials, returning + the value. + Throws exceptions if there are network problems or if the remote code + threw an exception. + @deprecated Use {@link #call(Writable, InetSocketAddress, Class, UserGroupInformation)} instead]]> + + + + + + + + + + + param, to the IPC server running at + address which is servicing the protocol protocol, + with the ticket credentials, returning the value. + Throws exceptions if there are network problems or if the remote code + threw an exception.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Unwraps any IOException. + + @param lookupTypes the desired exception class. + @return IOException, which is either the lookupClass exception or this.]]> + + + + + This unwraps any Throwable that has a constructor taking + a String as a parameter. + Otherwise it returns this. + + @return Throwable]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + protocol is a Java interface. All parameters and return types must + be one of: + +
  • a primitive type, boolean, byte, + char, short, int, long, + float, double, or void; or
  • + +
  • a {@link String}; or
  • + +
  • a {@link Writable}; or
  • + +
  • an array of the above types
+ + All methods in the protocol should throw only IOException. No field data of + the protocol instance is transmitted.]]> +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + handlerCount determines + the number of handler threads that will be used to process calls.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + ,name=RpcActivityForPort" + + Many of the activity metrics are sampled and averaged on an interval + which can be specified in the metrics config file. +

+ For the metrics that are sampled and averaged, one must specify + a metrics context that does periodic update calls. Most metrics contexts do. + The default Null metrics context however does NOT. So if you aren't + using any other metrics context then you can turn on the viewing and averaging + of sampled metrics by specifying the following two lines + in the hadoop-meterics.properties file: +

+        rpc.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
+        rpc.period=10
+  
+

+ Note that the metrics are collected regardless of the context used. + The context with the update thread is used to average the data periodically + + + + Impl details: We use a dynamic mbean that gets the list of the metrics + from the metrics registry passed as an argument to the constructor]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class has a number of metrics variables that are publicly accessible; + these variables (objects) have methods to update their values; + for example: +

{@link #rpcQueueTime}.inc(time)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For the statistics that are sampled and averaged, one must specify + a metrics context that does periodic update calls. Most do. + The default Null metrics context however does NOT. So if you aren't + using any other metrics context then you can turn on the viewing and averaging + of sampled metrics by specifying the following two lines + in the hadoop-meterics.properties file: +

+        rpc.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
+        rpc.period=10
+  
+

+ Note that the metrics are collected regardless of the context used. + The context with the update thread is used to average the data periodically]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When constructing the instance, if the factory property + contextName.class exists, + its value is taken to be the name of the class to instantiate. Otherwise, + the default is to create an instance of + org.apache.hadoop.metrics.spi.NullContext, which is a + dummy "no-op" context which will cause all metric data to be discarded. + + @param contextName the name of the context + @return the named MetricsContext]]> + + + + + + + + + + + + + + + + + + + + + When the instance is constructed, this method checks if the file + hadoop-metrics.properties exists on the class path. If it + exists, it must be in the format defined by java.util.Properties, and all + the properties in the file are set as attributes on the newly created + ContextFactory instance. + + @return the singleton ContextFactory instance]]> + + + + getFactory() method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + startMonitoring() again after calling + this. + @see #close()]]> + + + + + + + + + + + + + + + + recordName. + Throws an exception if the metrics implementation is configured with a fixed + set of record names and recordName is not in that set. + + @param recordName the name of the record + @throws MetricsException if recordName conflicts with configuration data]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A record name identifies the kind of data to be reported. For example, a + program reporting statistics relating to the disks on a computer might use + a record name "diskStats".

+ + A record has zero or more tags. A tag has a name and a value. To + continue the example, the "diskStats" record might use a tag named + "diskName" to identify a particular disk. Sometimes it is useful to have + more than one tag, so there might also be a "diskType" with value "ide" or + "scsi" or whatever.

+ + A record also has zero or more metrics. These are the named + values that are to be reported to the metrics system. In the "diskStats" + example, possible metric names would be "diskPercentFull", "diskPercentBusy", + "kbReadPerSecond", etc.

+ + The general procedure for using a MetricsRecord is to fill in its tag and + metric values, and then call update() to pass the record to the + client library. + Metric data is not immediately sent to the metrics system + each time that update() is called. + An internal table is maintained, identified by the record name. This + table has columns + corresponding to the tag and the metric names, and rows + corresponding to each unique set of tag values. An update + either modifies an existing row in the table, or adds a new row with a set of + tag values that are different from all the other rows. Note that if there + are no tags, then there can be at most one row in the table.

+ + Once a row is added to the table, its data will be sent to the metrics system + on every timer period, whether or not it has been updated since the previous + timer period. If this is inappropriate, for example if metrics were being + reported by some transient object in an application, the remove() + method can be used to remove the row and thus stop the data from being + sent.

+ + Note that the update() method is atomic. This means that it is + safe for different threads to be updating the same metric. More precisely, + it is OK for different threads to call update() on MetricsRecord instances + with the same set of tag names and tag values. Different threads should + not use the same MetricsRecord instance at the same time.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MetricsContext.registerUpdater().]]> + + + + + +The API is abstract so that it can be implemented on top of +a variety of metrics client libraries. The choice of +client library is a configuration option, and different +modules within the same application can use +different metrics implementation libraries. +

+Sub-packages: +

+
org.apache.hadoop.metrics.spi
+
The abstract Server Provider Interface package. Those wishing to + integrate the metrics API with a particular metrics client library should + extend this package.
+ +
org.apache.hadoop.metrics.file
+
An implementation package which writes the metric data to + a file, or sends it to the standard output stream.
+ +
org.apache.hadoop.metrics.ganglia
+
An implementation package which sends metric data to + Ganglia.
+
+ +

Introduction to the Metrics API

+ +Here is a simple example of how to use this package to report a single +metric value: +
+    private ContextFactory contextFactory = ContextFactory.getFactory();
+    
+    void reportMyMetric(float myMetric) {
+        MetricsContext myContext = contextFactory.getContext("myContext");
+        MetricsRecord myRecord = myContext.getRecord("myRecord");
+        myRecord.setMetric("myMetric", myMetric);
+        myRecord.update();
+    }
+
+ +In this example there are three names: +
+
myContext
+
The context name will typically identify either the application, or else a + module within an application or library.
+ +
myRecord
+
The record name generally identifies some entity for which a set of + metrics are to be reported. For example, you could have a record named + "cacheStats" for reporting a number of statistics relating to the usage of + some cache in your application.
+ +
myMetric
+
This identifies a particular metric. For example, you might have metrics + named "cache_hits" and "cache_misses". +
+
+ +

Tags

+ +In some cases it is useful to have multiple records with the same name. For +example, suppose that you want to report statistics about each disk on a computer. +In this case, the record name would be something like "diskStats", but you also +need to identify the disk which is done by adding a tag to the record. +The code could look something like this: +
+    private MetricsRecord diskStats =
+            contextFactory.getContext("myContext").getRecord("diskStats");
+            
+    void reportDiskMetrics(String diskName, float diskBusy, float diskUsed) {
+        diskStats.setTag("diskName", diskName);
+        diskStats.setMetric("diskBusy", diskBusy);
+        diskStats.setMetric("diskUsed", diskUsed);
+        diskStats.update();
+    }
+
+ +

Buffering and Callbacks

+ +Data is not sent immediately to the metrics system when +MetricsRecord.update() is called. Instead it is stored in an +internal table, and the contents of the table are sent periodically. +This can be important for two reasons: +
    +
  1. It means that a programmer is free to put calls to this API in an + inner loop, since updates can be very frequent without slowing down + the application significantly.
  2. +
  3. Some implementations can gain efficiency by combining many metrics + into a single UDP message.
  4. +
+ +The API provides a timer-based callback via the +registerUpdater() method. The benefit of this +versus using java.util.Timer is that the callbacks will be done +immediately before sending the data, making the data as current as possible. + +

Configuration

+ +It is possible to programmatically examine and modify configuration data +before creating a context, like this: +
+    ContextFactory factory = ContextFactory.getFactory();
+    ... examine and/or modify factory attributes ...
+    MetricsContext context = factory.getContext("myContext");
+
+The factory attributes can be examined and modified using the following +ContextFactorymethods: +
    +
  • Object getAttribute(String attributeName)
  • +
  • String[] getAttributeNames()
  • +
  • void setAttribute(String name, Object value)
  • +
  • void removeAttribute(attributeName)
  • +
+ +

+ContextFactory.getFactory() initializes the factory attributes by +reading the properties file hadoop-metrics.properties if it exists +on the class path. + +

+A factory attribute named: +

+contextName.class
+
+should have as its value the fully qualified name of the class to be +instantiated by a call of the CodeFactory method +getContext(contextName). If this factory attribute is not +specified, the default is to instantiate +org.apache.hadoop.metrics.file.FileContext. + +

+Other factory attributes are specific to a particular implementation of this +API and are documented elsewhere. For example, configuration attributes for +the file and Ganglia implementations can be found in the javadoc for +their respective packages.]]> + + + + + + + + + + + + + + + + + + + + + + + fileName attribute, + if specified. Otherwise the data will be written to standard + output.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is configured by setting ContextFactory attributes which in turn + are usually configured through a properties file. All the attributes are + prefixed by the contextName. For example, the properties file might contain: +

+ myContextName.fileName=/tmp/metrics.log
+ myContextName.period=5
+ 
]]> + +
+ + + +These are the implementation specific factory attributes +(See ContextFactory.getFactory()): + +
+
contextName.fileName
+
The path of the file to which metrics in context contextName + are to be appended. If this attribute is not specified, the metrics + are written to standard output by default.
+ +
contextName.period
+
The period in seconds on which the metric data is written to the + file.
+ +
]]> +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + +Implementation of the metrics package that sends metric data to +Ganglia. +Programmers should not normally need to use this package directly. Instead +they should use org.hadoop.metrics. + +

+These are the implementation specific factory attributes +(See ContextFactory.getFactory()): + +

+
contextName.servers
+
Space and/or comma separated sequence of servers to which UDP + messages should be sent.
+ +
contextName.period
+
The period in seconds on which the metric data is sent to the + server(s).
+ +
contextName.units.recordName.metricName
+
The units for the specified metric in the specified record.
+ +
contextName.slope.recordName.metricName
+
The slope for the specified metric in the specified record.
+ +
contextName.tmax.recordName.metricName
+
The tmax for the specified metric in the specified record.
+ +
contextName.dmax.recordName.metricName
+
The dmax for the specified metric in the specified record.
+ +
]]> +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + contextName.tableName. The returned map consists of + those attributes with the contextName and tableName stripped off.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + recordName. + Throws an exception if the metrics implementation is configured with a fixed + set of record names and recordName is not in that set. + + @param recordName the name of the record + @throws MetricsException if recordName conflicts with configuration data]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class implements the internal table of metric data, and the timer + on which data is to be sent to the metrics system. Subclasses must + override the abstract emitRecord method in order to transmit + the data.

]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + update + and remove().]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hostname or hostname:port. If + the specs string is null, defaults to localhost:defaultPort. + + @return a list of InetSocketAddress objects.]]> + + + + + + + + + org.apache.hadoop.metrics.file and +org.apache.hadoop.metrics.ganglia.

+ +Plugging in an implementation involves writing a concrete subclass of +AbstractMetricsContext. The subclass should get its + configuration information using the getAttribute(attributeName) + method.]]> + + + + + + + + + + + + + ,name=" + Where the and are the supplied parameters + + @param serviceName + @param nameName + @param theMbean - the MBean to register + @return the named used to register the MBean]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hadoop.rpc.socket.factory.class.<ClassName>. When no + such parameter exists then fall back on the default socket factory as + configured by hadoop.rpc.socket.factory.class.default. If + this default socket factory is not configured, then fall back on the JVM + default socket factory. + + @param conf the configuration + @param clazz the class (usually a {@link VersionedProtocol}) + @return a socket factory]]> + + + + + + hadoop.rpc.socket.factory.default + + @param conf the configuration + @return the default socket factory as specified in the configuration or + the JVM default socket factory if the configuration does not + contain a default socket factory property.]]> + + + + + + + + + + + + + : + ://:/]]> + + + + + + + + : + ://:/]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + From documentation for {@link #getInputStream(Socket, long)}:
+ Returns InputStream for the socket. If the socket has an associated + SocketChannel then it returns a + {@link SocketInputStream} with the given timeout. If the socket does not + have a channel, {@link Socket#getInputStream()} is returned. In the later + case, the timeout argument is ignored and the timeout set with + {@link Socket#setSoTimeout(int)} applies for reads.

+ + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getInputStream()}. + + @see #getInputStream(Socket, long) + + @param socket + @return InputStream for reading from the socket. + @throws IOException]]> +
+
+ + + + + +
+ + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getInputStream()}. + + @see Socket#getChannel() + + @param socket + @param timeout timeout in milliseconds. This may not always apply. zero + for waiting as long as necessary. + @return InputStream for reading from the socket. + @throws IOException]]> +
+
+ + + + +
+ + From documentation for {@link #getOutputStream(Socket, long)} :
+ Returns OutputStream for the socket. If the socket has an associated + SocketChannel then it returns a + {@link SocketOutputStream} with the given timeout. If the socket does not + have a channel, {@link Socket#getOutputStream()} is returned. In the later + case, the timeout argument is ignored and the write will wait until + data is available.

+ + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getOutputStream()}. + + @see #getOutputStream(Socket, long) + + @param socket + @return OutputStream for writing to the socket. + @throws IOException]]> +
+
+ + + + + +
+ + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getOutputStream()}. + + @see Socket#getChannel() + + @param socket + @param timeout timeout in milliseconds. This may not always apply. zero + for waiting as long as necessary. + @return OutputStream for writing to the socket. + @throws IOException]]> +
+
+ + + + + + + socket.connect(endpoint, timeout). If + socket.getChannel() returns a non-null channel, + connect is implemented using Hadoop's selectors. This is done mainly + to avoid Sun's connect implementation from creating thread-local + selectors, since Hadoop does not have control on when these are closed + and could end up taking all the available file descriptors. + + @see java.net.Socket#connect(java.net.SocketAddress, int) + + @param socket + @param endpoint + @param timeout - timeout in milliseconds]]> + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + node + + @param node + a node + @return true if node is already in the tree; false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + scope + if scope starts with ~, choose one from the all nodes except for the + ones in scope; otherwise, choose one from scope + @param scope range of nodes from which a node will be choosen + @return the choosen node]]> + + + + + + + scope but not in excludedNodes + if scope starts with ~, return the number of nodes that are not + in scope and excludedNodes; + @param scope a path string that may start with ~ + @param excludedNodes a list of nodes + @return number of available nodes]]> + + + + + + + + + + + + reader + It linearly scans the array, if a local node is found, swap it with + the first element of the array. + If a local rack node is found, swap it with the first element following + the local node. + If neither local node or local rack node is found, put a random replica + location at postion 0. + It leaves the rest nodes untouched.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + Create a new input stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + + @see SocketInputStream#SocketInputStream(ReadableByteChannel, long) + + @param socket should have a channel associated with it. + @param timeout timeout timeout in milliseconds. must not be negative. + @throws IOException]]> +
+
+ + + +
+ + Create a new input stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + @see SocketInputStream#SocketInputStream(ReadableByteChannel, long) + + @param socket should have a channel associated with it. + @throws IOException]]> +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + +
+ + Create a new ouput stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + + @see SocketOutputStream#SocketOutputStream(WritableByteChannel, long) + + @param socket should have a channel associated with it. + @param timeout timeout timeout in milliseconds. must not be negative. + @throws IOException]]> +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + = getCount(). + @param newCapacity The new capacity in bytes.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Index idx = startVector(...); + while (!idx.done()) { + .... // read element of a vector + idx.incr(); + } + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Introduction + + Software systems of any significant complexity require mechanisms for data +interchange with the outside world. These interchanges typically involve the +marshaling and unmarshaling of logical units of data to and from data streams +(files, network connections, memory buffers etc.). Applications usually have +some code for serializing and deserializing the data types that they manipulate +embedded in them. The work of serialization has several features that make +automatic code generation for it worthwhile. Given a particular output encoding +(binary, XML, etc.), serialization of primitive types and simple compositions +of primitives (structs, vectors etc.) is a very mechanical task. Manually +written serialization code can be susceptible to bugs especially when records +have a large number of fields or a record definition changes between software +versions. Lastly, it can be very useful for applications written in different +programming languages to be able to share and interchange data. This can be +made a lot easier by describing the data records manipulated by these +applications in a language agnostic manner and using the descriptions to derive +implementations of serialization in multiple target languages. + +This document describes Hadoop Record I/O, a mechanism that is aimed +at +

    +
  • enabling the specification of simple serializable data types (records) +
  • enabling the generation of code in multiple target languages for +marshaling and unmarshaling such types +
  • providing target language specific support that will enable application +programmers to incorporate generated code into their applications +
+ +The goals of Hadoop Record I/O are similar to those of mechanisms such as XDR, +ASN.1, PADS and ICE. While these systems all include a DDL that enables +the specification of most record types, they differ widely in what else they +focus on. The focus in Hadoop Record I/O is on data marshaling and +multi-lingual support. We take a translator-based approach to serialization. +Hadoop users have to describe their data in a simple data description +language. The Hadoop DDL translator rcc generates code that users +can invoke in order to read/write their data from/to simple stream +abstractions. Next we list explicitly some of the goals and non-goals of +Hadoop Record I/O. + + +

Goals

+ +
    +
  • Support for commonly used primitive types. Hadoop should include as +primitives commonly used builtin types from programming languages we intend to +support. + +
  • Support for common data compositions (including recursive compositions). +Hadoop should support widely used composite types such as structs and +vectors. + +
  • Code generation in multiple target languages. Hadoop should be capable of +generating serialization code in multiple target languages and should be +easily extensible to new target languages. The initial target languages are +C++ and Java. + +
  • Support for generated target languages. Hadooop should include support +in the form of headers, libraries, packages for supported target languages +that enable easy inclusion and use of generated code in applications. + +
  • Support for multiple output encodings. Candidates include +packed binary, comma-separated text, XML etc. + +
  • Support for specifying record types in a backwards/forwards compatible +manner. This will probably be in the form of support for optional fields in +records. This version of the document does not include a description of the +planned mechanism, we intend to include it in the next iteration. + +
+ +

Non-Goals

+ +
    +
  • Serializing existing arbitrary C++ classes. +
  • Serializing complex data structures such as trees, linked lists etc. +
  • Built-in indexing schemes, compression, or check-sums. +
  • Dynamic construction of objects from an XML schema. +
+ +The remainder of this document describes the features of Hadoop record I/O +in more detail. Section 2 describes the data types supported by the system. +Section 3 lays out the DDL syntax with some examples of simple records. +Section 4 describes the process of code generation with rcc. Section 5 +describes target language mappings and support for Hadoop types. We include a +fairly complete description of C++ mappings with intent to include Java and +others in upcoming iterations of this document. The last section talks about +supported output encodings. + + +

Data Types and Streams

+ +This section describes the primitive and composite types supported by Hadoop. +We aim to support a set of types that can be used to simply and efficiently +express a wide range of record types in different programming languages. + +

Primitive Types

+ +For the most part, the primitive types of Hadoop map directly to primitive +types in high level programming languages. Special cases are the +ustring (a Unicode string) and buffer types, which we believe +find wide use and which are usually implemented in library code and not +available as language built-ins. Hadoop also supplies these via library code +when a target language built-in is not present and there is no widely +adopted "standard" implementation. The complete list of primitive types is: + +
    +
  • byte: An 8-bit unsigned integer. +
  • boolean: A boolean value. +
  • int: A 32-bit signed integer. +
  • long: A 64-bit signed integer. +
  • float: A single precision floating point number as described by + IEEE-754. +
  • double: A double precision floating point number as described by + IEEE-754. +
  • ustring: A string consisting of Unicode characters. +
  • buffer: An arbitrary sequence of bytes. +
+ + +

Composite Types

+Hadoop supports a small set of composite types that enable the description +of simple aggregate types and containers. A composite type is serialized +by sequentially serializing it constituent elements. The supported +composite types are: + +
    + +
  • record: An aggregate type like a C-struct. This is a list of +typed fields that are together considered a single unit of data. A record +is serialized by sequentially serializing its constituent fields. In addition +to serialization a record has comparison operations (equality and less-than) +implemented for it, these are defined as memberwise comparisons. + +
  • vector: A sequence of entries of the same data type, primitive +or composite. + +
  • map: An associative container mapping instances of a key type to +instances of a value type. The key and value types may themselves be primitive +or composite types. + +
+ +

Streams

+ +Hadoop generates code for serializing and deserializing record types to +abstract streams. For each target language Hadoop defines very simple input +and output stream interfaces. Application writers can usually develop +concrete implementations of these by putting a one method wrapper around +an existing stream implementation. + + +

DDL Syntax and Examples

+ +We now describe the syntax of the Hadoop data description language. This is +followed by a few examples of DDL usage. + +

Hadoop DDL Syntax

+ +

+recfile = *include module *record
+include = "include" path
+path = (relative-path / absolute-path)
+module = "module" module-name
+module-name = name *("." name)
+record := "class" name "{" 1*(field) "}"
+field := type name ";"
+name :=  ALPHA (ALPHA / DIGIT / "_" )*
+type := (ptype / ctype)
+ptype := ("byte" / "boolean" / "int" |
+          "long" / "float" / "double"
+          "ustring" / "buffer")
+ctype := (("vector" "<" type ">") /
+          ("map" "<" type "," type ">" ) ) / name)
+
+ +A DDL file describes one or more record types. It begins with zero or +more include declarations, a single mandatory module declaration +followed by zero or more class declarations. The semantics of each of +these declarations are described below: + +
    + +
  • include: An include declaration specifies a DDL file to be +referenced when generating code for types in the current DDL file. Record types +in the current compilation unit may refer to types in all included files. +File inclusion is recursive. An include does not trigger code +generation for the referenced file. + +
  • module: Every Hadoop DDL file must have a single module +declaration that follows the list of includes and precedes all record +declarations. A module declaration identifies a scope within which +the names of all types in the current file are visible. Module names are +mapped to C++ namespaces, Java packages etc. in generated code. + +
  • class: Records types are specified through class +declarations. A class declaration is like a Java class declaration. +It specifies a named record type and a list of fields that constitute records +of the type. Usage is illustrated in the following examples. + +
+ +

Examples

+ +
    +
  • A simple DDL file links.jr with just one record declaration. +
    
    +module links {
    +    class Link {
    +        ustring URL;
    +        boolean isRelative;
    +        ustring anchorText;
    +    };
    +}
    +
    + +
  • A DDL file outlinks.jr which includes another +
    
    +include "links.jr"
    +
    +module outlinks {
    +    class OutLinks {
    +        ustring baseURL;
    +        vector outLinks;
    +    };
    +}
    +
    +
+ +

Code Generation

+ +The Hadoop translator is written in Java. Invocation is done by executing a +wrapper shell script named named rcc. It takes a list of +record description files as a mandatory argument and an +optional language argument (the default is Java) --language or +-l. Thus a typical invocation would look like: +

+$ rcc -l C++  ...
+
+ + +

Target Language Mappings and Support

+ +For all target languages, the unit of code generation is a record type. +For each record type, Hadoop generates code for serialization and +deserialization, record comparison and access to record members. + +

C++

+ +Support for including Hadoop generated C++ code in applications comes in the +form of a header file recordio.hh which needs to be included in source +that uses Hadoop types and a library librecordio.a which applications need +to be linked with. The header declares the Hadoop C++ namespace which defines +appropriate types for the various primitives, the basic interfaces for +records and streams and enumerates the supported serialization encodings. +Declarations of these interfaces and a description of their semantics follow: + +

+namespace hadoop {
+
+  enum RecFormat { kBinary, kXML, kCSV };
+
+  class InStream {
+  public:
+    virtual ssize_t read(void *buf, size_t n) = 0;
+  };
+
+  class OutStream {
+  public:
+    virtual ssize_t write(const void *buf, size_t n) = 0;
+  };
+
+  class IOError : public runtime_error {
+  public:
+    explicit IOError(const std::string& msg);
+  };
+
+  class IArchive;
+  class OArchive;
+
+  class RecordReader {
+  public:
+    RecordReader(InStream& in, RecFormat fmt);
+    virtual ~RecordReader(void);
+
+    virtual void read(Record& rec);
+  };
+
+  class RecordWriter {
+  public:
+    RecordWriter(OutStream& out, RecFormat fmt);
+    virtual ~RecordWriter(void);
+
+    virtual void write(Record& rec);
+  };
+
+
+  class Record {
+  public:
+    virtual std::string type(void) const = 0;
+    virtual std::string signature(void) const = 0;
+  protected:
+    virtual bool validate(void) const = 0;
+
+    virtual void
+    serialize(OArchive& oa, const std::string& tag) const = 0;
+
+    virtual void
+    deserialize(IArchive& ia, const std::string& tag) = 0;
+  };
+}
+
+ +
    + +
  • RecFormat: An enumeration of the serialization encodings supported +by this implementation of Hadoop. + +
  • InStream: A simple abstraction for an input stream. This has a +single public read method that reads n bytes from the stream into +the buffer buf. Has the same semantics as a blocking read system +call. Returns the number of bytes read or -1 if an error occurs. + +
  • OutStream: A simple abstraction for an output stream. This has a +single write method that writes n bytes to the stream from the +buffer buf. Has the same semantics as a blocking write system +call. Returns the number of bytes written or -1 if an error occurs. + +
  • RecordReader: A RecordReader reads records one at a time from +an underlying stream in a specified record format. The reader is instantiated +with a stream and a serialization format. It has a read method that +takes an instance of a record and deserializes the record from the stream. + +
  • RecordWriter: A RecordWriter writes records one at a +time to an underlying stream in a specified record format. The writer is +instantiated with a stream and a serialization format. It has a +write method that takes an instance of a record and serializes the +record to the stream. + +
  • Record: The base class for all generated record types. This has two +public methods type and signature that return the typename and the +type signature of the record. + +
+ +Two files are generated for each record file (note: not for each record). If a +record file is named "name.jr", the generated files are +"name.jr.cc" and "name.jr.hh" containing serialization +implementations and record type declarations respectively. + +For each record in the DDL file, the generated header file will contain a +class definition corresponding to the record type, method definitions for the +generated type will be present in the '.cc' file. The generated class will +inherit from the abstract class hadoop::Record. The DDL files +module declaration determines the namespace the record belongs to. +Each '.' delimited token in the module declaration results in the +creation of a namespace. For instance, the declaration module docs.links +results in the creation of a docs namespace and a nested +docs::links namespace. In the preceding examples, the Link class +is placed in the links namespace. The header file corresponding to +the links.jr file will contain: + +

+namespace links {
+  class Link : public hadoop::Record {
+    // ....
+  };
+};
+
+ +Each field within the record will cause the generation of a private member +declaration of the appropriate type in the class declaration, and one or more +acccessor methods. The generated class will implement the serialize and +deserialize methods defined in hadoop::Record+. It will also +implement the inspection methods type and signature from +hadoop::Record. A default constructor and virtual destructor will also +be generated. Serialization code will read/write records into streams that +implement the hadoop::InStream and the hadoop::OutStream interfaces. + +For each member of a record an accessor method is generated that returns +either the member or a reference to the member. For members that are returned +by value, a setter method is also generated. This is true for primitive +data members of the types byte, int, long, boolean, float and +double. For example, for a int field called MyField the folowing +code is generated. + +

+...
+private:
+  int32_t mMyField;
+  ...
+public:
+  int32_t getMyField(void) const {
+    return mMyField;
+  };
+
+  void setMyField(int32_t m) {
+    mMyField = m;
+  };
+  ...
+
+ +For a ustring or buffer or composite field. The generated code +only contains accessors that return a reference to the field. A const +and a non-const accessor are generated. For example: + +

+...
+private:
+  std::string mMyBuf;
+  ...
+public:
+
+  std::string& getMyBuf() {
+    return mMyBuf;
+  };
+
+  const std::string& getMyBuf() const {
+    return mMyBuf;
+  };
+  ...
+
+ +

Examples

+ +Suppose the inclrec.jr file contains: +

+module inclrec {
+    class RI {
+        int      I32;
+        double   D;
+        ustring  S;
+    };
+}
+
+ +and the testrec.jr file contains: + +

+include "inclrec.jr"
+module testrec {
+    class R {
+        vector VF;
+        RI            Rec;
+        buffer        Buf;
+    };
+}
+
+ +Then the invocation of rcc such as: +

+$ rcc -l c++ inclrec.jr testrec.jr
+
+will result in generation of four files: +inclrec.jr.{cc,hh} and testrec.jr.{cc,hh}. + +The inclrec.jr.hh will contain: + +

+#ifndef _INCLREC_JR_HH_
+#define _INCLREC_JR_HH_
+
+#include "recordio.hh"
+
+namespace inclrec {
+  
+  class RI : public hadoop::Record {
+
+  private:
+
+    int32_t      I32;
+    double       D;
+    std::string  S;
+
+  public:
+
+    RI(void);
+    virtual ~RI(void);
+
+    virtual bool operator==(const RI& peer) const;
+    virtual bool operator<(const RI& peer) const;
+
+    virtual int32_t getI32(void) const { return I32; }
+    virtual void setI32(int32_t v) { I32 = v; }
+
+    virtual double getD(void) const { return D; }
+    virtual void setD(double v) { D = v; }
+
+    virtual std::string& getS(void) const { return S; }
+    virtual const std::string& getS(void) const { return S; }
+
+    virtual std::string type(void) const;
+    virtual std::string signature(void) const;
+
+  protected:
+
+    virtual void serialize(hadoop::OArchive& a) const;
+    virtual void deserialize(hadoop::IArchive& a);
+  };
+} // end namespace inclrec
+
+#endif /* _INCLREC_JR_HH_ */
+
+
+ +The testrec.jr.hh file will contain: + + +

+
+#ifndef _TESTREC_JR_HH_
+#define _TESTREC_JR_HH_
+
+#include "inclrec.jr.hh"
+
+namespace testrec {
+  class R : public hadoop::Record {
+
+  private:
+
+    std::vector VF;
+    inclrec::RI        Rec;
+    std::string        Buf;
+
+  public:
+
+    R(void);
+    virtual ~R(void);
+
+    virtual bool operator==(const R& peer) const;
+    virtual bool operator<(const R& peer) const;
+
+    virtual std::vector& getVF(void) const;
+    virtual const std::vector& getVF(void) const;
+
+    virtual std::string& getBuf(void) const ;
+    virtual const std::string& getBuf(void) const;
+
+    virtual inclrec::RI& getRec(void) const;
+    virtual const inclrec::RI& getRec(void) const;
+    
+    virtual bool serialize(hadoop::OutArchive& a) const;
+    virtual bool deserialize(hadoop::InArchive& a);
+    
+    virtual std::string type(void) const;
+    virtual std::string signature(void) const;
+  };
+}; // end namespace testrec
+#endif /* _TESTREC_JR_HH_ */
+
+
+ +

Java

+ +Code generation for Java is similar to that for C++. A Java class is generated +for each record type with private members corresponding to the fields. Getters +and setters for fields are also generated. Some differences arise in the +way comparison is expressed and in the mapping of modules to packages and +classes to files. For equality testing, an equals method is generated +for each record type. As per Java requirements a hashCode method is also +generated. For comparison a compareTo method is generated for each +record type. This has the semantics as defined by the Java Comparable +interface, that is, the method returns a negative integer, zero, or a positive +integer as the invoked object is less than, equal to, or greater than the +comparison parameter. + +A .java file is generated per record type as opposed to per DDL +file as in C++. The module declaration translates to a Java +package declaration. The module name maps to an identical Java package +name. In addition to this mapping, the DDL compiler creates the appropriate +directory hierarchy for the package and places the generated .java +files in the correct directories. + +

Mapping Summary

+ +

+DDL Type        C++ Type            Java Type 
+
+boolean         bool                boolean
+byte            int8_t              byte
+int             int32_t             int
+long            int64_t             long
+float           float               float
+double          double              double
+ustring         std::string         java.lang.String
+buffer          std::string         org.apache.hadoop.record.Buffer
+class type      class type          class type
+vector    std::vector   java.util.ArrayList
+map  std::map java.util.TreeMap
+
+ +

Data encodings

+ +This section describes the format of the data encodings supported by Hadoop. +Currently, three data encodings are supported, namely binary, CSV and XML. + +

Binary Serialization Format

+ +The binary data encoding format is fairly dense. Serialization of composite +types is simply defined as a concatenation of serializations of the constituent +elements (lengths are included in vectors and maps). + +Composite types are serialized as follows: +
    +
  • class: Sequence of serialized members. +
  • vector: The number of elements serialized as an int. Followed by a +sequence of serialized elements. +
  • map: The number of key value pairs serialized as an int. Followed +by a sequence of serialized (key,value) pairs. +
+ +Serialization of primitives is more interesting, with a zero compression +optimization for integral types and normalization to UTF-8 for strings. +Primitive types are serialized as follows: + +
    +
  • byte: Represented by 1 byte, as is. +
  • boolean: Represented by 1-byte (0 or 1) +
  • int/long: Integers and longs are serialized zero compressed. +Represented as 1-byte if -120 <= value < 128. Otherwise, serialized as a +sequence of 2-5 bytes for ints, 2-9 bytes for longs. The first byte represents +the number of trailing bytes, N, as the negative number (-120-N). For example, +the number 1024 (0x400) is represented by the byte sequence 'x86 x04 x00'. +This doesn't help much for 4-byte integers but does a reasonably good job with +longs without bit twiddling. +
  • float/double: Serialized in IEEE 754 single and double precision +format in network byte order. This is the format used by Java. +
  • ustring: Serialized as 4-byte zero compressed length followed by +data encoded as UTF-8. Strings are normalized to UTF-8 regardless of native +language representation. +
  • buffer: Serialized as a 4-byte zero compressed length followed by the +raw bytes in the buffer. +
+ + +

CSV Serialization Format

+ +The CSV serialization format has a lot more structure than the "standard" +Excel CSV format, but we believe the additional structure is useful because + +
    +
  • it makes parsing a lot easier without detracting too much from legibility +
  • the delimiters around composites make it obvious when one is reading a +sequence of Hadoop records +
+ +Serialization formats for the various types are detailed in the grammar that +follows. The notable feature of the formats is the use of delimiters for +indicating the certain field types. + +
    +
  • A string field begins with a single quote ('). +
  • A buffer field begins with a sharp (#). +
  • A class, vector or map begins with 's{', 'v{' or 'm{' respectively and +ends with '}'. +
+ +The CSV format can be described by the following grammar: + +

+record = primitive / struct / vector / map
+primitive = boolean / int / long / float / double / ustring / buffer
+
+boolean = "T" / "F"
+int = ["-"] 1*DIGIT
+long = ";" ["-"] 1*DIGIT
+float = ["-"] 1*DIGIT "." 1*DIGIT ["E" / "e" ["-"] 1*DIGIT]
+double = ";" ["-"] 1*DIGIT "." 1*DIGIT ["E" / "e" ["-"] 1*DIGIT]
+
+ustring = "'" *(UTF8 char except NULL, LF, % and , / "%00" / "%0a" / "%25" / "%2c" )
+
+buffer = "#" *(BYTE except NULL, LF, % and , / "%00" / "%0a" / "%25" / "%2c" )
+
+struct = "s{" record *("," record) "}"
+vector = "v{" [record *("," record)] "}"
+map = "m{" [*(record "," record)] "}"
+
+ +

XML Serialization Format

+ +The XML serialization format is the same used by Apache XML-RPC +(http://ws.apache.org/xmlrpc/types.html). This is an extension of the original +XML-RPC format and adds some additional data types. All record I/O types are +not directly expressible in this format, and access to a DDL is required in +order to convert these to valid types. All types primitive or composite are +represented by <value> elements. The particular XML-RPC type is +indicated by a nested element in the <value> element. The encoding for +records is always UTF-8. Primitive types are serialized as follows: + +
    +
  • byte: XML tag <ex:i1>. Values: 1-byte unsigned +integers represented in US-ASCII +
  • boolean: XML tag <boolean>. Values: "0" or "1" +
  • int: XML tags <i4> or <int>. Values: 4-byte +signed integers represented in US-ASCII. +
  • long: XML tag <ex:i8>. Values: 8-byte signed integers +represented in US-ASCII. +
  • float: XML tag <ex:float>. Values: Single precision +floating point numbers represented in US-ASCII. +
  • double: XML tag <double>. Values: Double precision +floating point numbers represented in US-ASCII. +
  • ustring: XML tag <;string>. Values: String values +represented as UTF-8. XML does not permit all Unicode characters in literal +data. In particular, NULLs and control chars are not allowed. Additionally, +XML processors are required to replace carriage returns with line feeds and to +replace CRLF sequences with line feeds. Programming languages that we work +with do not impose these restrictions on string types. To work around these +restrictions, disallowed characters and CRs are percent escaped in strings. +The '%' character is also percent escaped. +
  • buffer: XML tag <string&>. Values: Arbitrary binary +data. Represented as hexBinary, each byte is replaced by its 2-byte +hexadecimal representation. +
+ +Composite types are serialized as follows: + +
    +
  • class: XML tag <struct>. A struct is a sequence of +<member> elements. Each <member> element has a <name> +element and a <value> element. The <name> is a string that must +match /[a-zA-Z][a-zA-Z0-9_]*/. The value of the member is represented +by a <value> element. + +
  • vector: XML tag <array<. An <array> contains a +single <data> element. The <data> element is a sequence of +<value> elements each of which represents an element of the vector. + +
  • map: XML tag <array>. Same as vector. + +
+ +For example: + +

+class {
+  int           MY_INT;            // value 5
+  vector MY_VEC;            // values 0.1, -0.89, 2.45e4
+  buffer        MY_BUF;            // value '\00\n\tabc%'
+}
+
+ +is serialized as + +

+<value>
+  <struct>
+    <member>
+      <name>MY_INT</name>
+      <value><i4>5</i4></value>
+    </member>
+    <member>
+      <name>MY_VEC</name>
+      <value>
+        <array>
+          <data>
+            <value><ex:float>0.1</ex:float></value>
+            <value><ex:float>-0.89</ex:float></value>
+            <value><ex:float>2.45e4</ex:float></value>
+          </data>
+        </array>
+      </value>
+    </member>
+    <member>
+      <name>MY_BUF</name>
+      <value><string>%00\n\tabc%25</string></value>
+    </member>
+  </struct>
+</value> 
+
]]> +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This task takes the given record definition files and compiles them into + java or c++ + files. It is then up to the user to compile the generated files. + +

The task requires the file or the nested fileset element to be + specified. Optional attributes are language (set the output + language, default is "java"), + destdir (name of the destination directory for generated java/c++ + code, default is ".") and failonerror (specifies error handling + behavior. default is true). +

Usage

+
+ <recordcc
+       destdir="${basedir}/gensrc"
+       language="java">
+   <fileset include="**\/*.jr" />
+ </recordcc>
+ 
]]> +
+
+ +
+ + + + + + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (cause==null ? null : cause.toString()) (which + typically contains the class and detail message of cause). + @param cause the cause (which is saved for later retrieval by the + {@link #getCause()} method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.)]]> + + + + + + + + + + + + + Group with the given groupname. + @param group group name]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ugi. + @param ugi user + @return the {@link Subject} for the user identified by ugi]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ugi as a comma separated string in + conf as a property attr + + The String starts with the user name followed by the default group names, + and other group names. + + @param conf configuration + @param attr property name + @param ugi a UnixUserGroupInformation]]> + + + + + + + + conf + + The object is expected to store with the property name attr + as a comma separated string that starts + with the user name followed by group names. + If the property name is not defined, return null. + It's assumed that there is only one UGI per user. If this user already + has a UGI in the ugi map, return the ugi in the map. + Otherwise, construct a UGI from the configuration, store it in the + ugi map and return it. + + @param conf configuration + @param attr property name + @return a UnixUGI + @throws LoginException if the stored string is ill-formatted.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + User with the given username. + @param user user name]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (cause==null ? null : cause.toString()) (which + typically contains the class and detail message of cause). + @param cause the cause (which is saved for later retrieval by the + {@link #getCause()} method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.)]]> + + + + + + + + + + + + + + does not provide the stack trace for security purposes.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + service as related to + Service Level Authorization for Hadoop. + + Each service defines it's configuration key and also the necessary + {@link Permission} required to access the service.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + in]]> + + + + + + + out.]]> + + + + + + + + + + reset is true, then resets the checksum. + @return number of bytes written. Will be equal to getChecksumSize();]]> + + + + + + + + + reset is true, then resets the checksum. + @return number of bytes written. Will be equal to getChecksumSize();]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GenericOptionsParser to parse only the generic Hadoop + arguments. + + The array of string arguments other than the generic arguments can be + obtained by {@link #getRemainingArgs()}. + + @param conf the Configuration to modify. + @param args command-line arguments.]]> + + + + + GenericOptionsParser to parse given options as well + as generic Hadoop options. + + The resulting CommandLine object can be obtained by + {@link #getCommandLine()}. + + @param conf the configuration to modify + @param options options built by the caller + @param args User-specified arguments]]> + + + + + Strings containing the un-parsed arguments + or empty array if commandLine was not defined.]]> + + + + + + + + + + CommandLine object + to process the parsed arguments. + + Note: If the object is created with + {@link #GenericOptionsParser(Configuration, String[])}, then returned + object will only contain parsed generic options. + + @return CommandLine representing list of arguments + parsed against Options descriptor.]]> + + + + + + + + + + + + + + + + + GenericOptionsParser is a utility to parse command line + arguments generic to the Hadoop framework. + + GenericOptionsParser recognizes several standarad command + line arguments, enabling applications to easily specify a namenode, a + jobtracker, additional configuration resources etc. + +

Generic Options

+ +

The supported generic options are:

+

+     -conf <configuration file>     specify a configuration file
+     -D <property=value>            use value for given property
+     -fs <local|namenode:port>      specify a namenode
+     -jt <local|jobtracker:port>    specify a job tracker
+     -files <comma separated list of files>    specify comma separated
+                            files to be copied to the map reduce cluster
+     -libjars <comma separated list of jars>   specify comma separated
+                            jar files to include in the classpath.
+     -archives <comma separated list of archives>    specify comma
+             separated archives to be unarchived on the compute machines.
+
+ 

+ +

The general command line syntax is:

+

+ bin/hadoop command [genericOptions] [commandOptions]
+ 

+ +

Generic command line arguments might modify + Configuration objects, given to constructors.

+ +

The functionality is implemented using Commons CLI.

+ +

Examples:

+

+ $ bin/hadoop dfs -fs darwin:8020 -ls /data
+ list /data directory in dfs with namenode darwin:8020
+ 
+ $ bin/hadoop dfs -D fs.default.name=darwin:8020 -ls /data
+ list /data directory in dfs with namenode darwin:8020
+     
+ $ bin/hadoop dfs -conf hadoop-site.xml -ls /data
+ list /data directory in dfs with conf specified in hadoop-site.xml
+     
+ $ bin/hadoop job -D mapred.job.tracker=darwin:50020 -submit job.xml
+ submit a job to job tracker darwin:50020
+     
+ $ bin/hadoop job -jt darwin:50020 -submit job.xml
+ submit a job to job tracker darwin:50020
+     
+ $ bin/hadoop job -jt local -submit job.xml
+ submit a job to local runner
+ 
+ $ bin/hadoop jar -libjars testlib.jar 
+ -archives test.tgz -files file.txt inputjar args
+ job submission with libjars, files and archives
+ 

+ + @see Tool + @see ToolRunner]]> +
+
+ + + + + + + + + Class<T>) of the + argument of type T. + @param The type of the argument + @param t the object to get it class + @return Class<T>]]> + + + + + + + List<T> to a an array of + T[]. + @param c the Class object of the items in the list + @param list the list to convert]]> + + + + + + List<T> to a an array of + T[]. + @param list the list to convert + @throws ArrayIndexOutOfBoundsException if the list is empty. + Use {@link #toArray(Class, List)} if the list may be empty.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + io.file.buffer.size specified in the given + Configuration. + @param in input stream + @param conf configuration + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if native-hadoop is loaded, + else false]]> + + + + + + true if native hadoop libraries, if present, can be + used for this job; false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { pq.top().change(); pq.adjustTop(); } + instead of
+  { o = pq.pop(); o.change(); pq.push(o); }
+ 
]]> +
+
+ + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Clients and/or applications can use the provided Progressable + to explicitly report progress to the Hadoop framework. This is especially + important for operations which take an insignificant amount of time since, + in-lieu of the reported progress, the framework has to assume that an error + has occured and time-out the operation.

]]> +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Class is to be obtained + @return the correctly typed Class of the given object.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Hadoop Pipes + or Hadoop Streaming. + + It also checks to ensure that we are running on a *nix platform else + (e.g. in Cygwin/Windows) it returns null. + @param conf configuration + @return a String[] with the ulimit command arguments or + null if we are running on a non *nix platform or + if the limit is unspecified.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Shell interface. + @param cmd shell command to execute. + @return the output of the executed command.]]> + + + + + + + + Shell interface. + @param env the map of environment key=value + @param cmd shell command to execute. + @return the output of the executed command.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + Shell can be used to run unix commands like du or + df. It also offers facilities to gate commands by + time-intervals.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ShellCommandExecutorshould be used in cases where the output + of the command needs no explicit parsing and where the command, working + directory and the environment remains unchanged. The output of the command + is stored as-is and is expected to be small.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ArrayList of string values]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + charToEscape in the string + with the escape char escapeChar + + @param str string + @param escapeChar escape char + @param charToEscape the char to be escaped + @return an escaped string]]> + + + + + + + + + + + + + + + + + + + + + + charToEscape in the string + with the escape char escapeChar + + @param str string + @param escapeChar escape char + @param charToEscape the escaped char + @return an unescaped string]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tool, is the standard for any Map-Reduce tool/application. + The tool/application should delegate the handling of + + standard command-line options to {@link ToolRunner#run(Tool, String[])} + and only handle its custom arguments.

+ +

Here is how a typical Tool is implemented:

+

+     public class MyApp extends Configured implements Tool {
+     
+       public int run(String[] args) throws Exception {
+         // Configuration processed by ToolRunner
+         Configuration conf = getConf();
+         
+         // Create a JobConf using the processed conf
+         JobConf job = new JobConf(conf, MyApp.class);
+         
+         // Process custom command-line options
+         Path in = new Path(args[1]);
+         Path out = new Path(args[2]);
+         
+         // Specify various job-specific parameters     
+         job.setJobName("my-app");
+         job.setInputPath(in);
+         job.setOutputPath(out);
+         job.setMapperClass(MyApp.MyMapper.class);
+         job.setReducerClass(MyApp.MyReducer.class);
+
+         // Submit the job, then poll for progress until the job is complete
+         JobClient.runJob(job);
+       }
+       
+       public static void main(String[] args) throws Exception {
+         // Let ToolRunner handle generic command-line options 
+         int res = ToolRunner.run(new Configuration(), new Sort(), args);
+         
+         System.exit(res);
+       }
+     }
+ 

+ + @see GenericOptionsParser + @see ToolRunner]]> +
+
+ + + + + + + + + + + + Tool by {@link Tool#run(String[])}, after + parsing with the given generic arguments. Uses the given + Configuration, or builds one if null. + + Sets the Tool's configuration with the possibly modified + version of the conf. + + @param conf Configuration for the Tool. + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + Tool with its Configuration. + + Equivalent to run(tool.getConf(), tool, args). + + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + + + ToolRunner can be used to run classes implementing + Tool interface. It works in conjunction with + {@link GenericOptionsParser} to parse the + + generic hadoop command line arguments and modifies the + Configuration of the Tool. The + application-specific options are passed along without being modified. +

+ + @see Tool + @see GenericOptionsParser]]> +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Bloom filter, as defined by Bloom in 1970. +

+ The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by + the networking research community in the past decade thanks to the bandwidth efficiencies that it + offers for the transmission of set membership information between networked hosts. A sender encodes + the information into a bit vector, the Bloom filter, that is more compact than a conventional + representation. Computation and space costs for construction are linear in the number of elements. + The receiver uses the filter to test whether various elements are members of the set. Though the + filter will occasionally return a false positive, it will never return a false negative. When creating + the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size. + +

+ Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + + @see Space/Time Trade-Offs in Hash Coding with Allowable Errors]]> + + + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + this counting Bloom filter. +

+ Invariant: nothing happens if the specified key does not belong to this counter Bloom filter. + @param key The key to remove.]]> + + + + + + + + + + + + key -> count map. +

NOTE: due to the bucket size of this filter, inserting the same + key more than 15 times will cause an overflow at all filter positions + associated with this key, and it will significantly increase the error + rate for this and other keys. For this reason the filter can only be + used to store small count values 0 <= N << 15. + @param key key to be tested + @return 0 if the key is not present. Otherwise, a positive value v will + be returned such that v == count with probability equal to the + error rate of this filter, and v > count otherwise. + Additionally, if the filter experienced an underflow as a result of + {@link #delete(Key)} operation, the return value may be lower than the + count with the probability of the false negative rate of such + filter.]]> + + + + + + + + + + + + + + + + + + + + + + counting Bloom filter, as defined by Fan et al. in a ToN + 2000 paper. +

+ A counting Bloom filter is an improvement to standard a Bloom filter as it + allows dynamic additions and deletions of set membership information. This + is achieved through the use of a counting vector instead of a bit vector. +

+ Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + + @see Summary cache: a scalable wide-area web cache sharing protocol]]> + + + + + + + + + + + + + + Builds an empty Dynamic Bloom filter. + @param vectorSize The number of bits in the vector. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}). + @param nr The threshold for the maximum number of keys to record in a + dynamic Bloom filter row.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + dynamic Bloom filter, as defined in the INFOCOM 2006 paper. +

+ A dynamic Bloom filter (DBF) makes use of a s * m bit matrix but + each of the s rows is a standard Bloom filter. The creation + process of a DBF is iterative. At the start, the DBF is a 1 * m + bit matrix, i.e., it is composed of a single standard Bloom filter. + It assumes that nr elements are recorded in the + initial bit vector, where nr <= n (n is + the cardinality of the set A to record in the filter). +

+ As the size of A grows during the execution of the application, + several keys must be inserted in the DBF. When inserting a key into the DBF, + one must first get an active Bloom filter in the matrix. A Bloom filter is + active when the number of recorded keys, nr, is + strictly less than the current cardinality of A, n. + If an active Bloom filter is found, the key is inserted and + nr is incremented by one. On the other hand, if there + is no active Bloom filter, a new one is created (i.e., a new row is added to + the matrix) according to the current size of A and the element + is added in this new Bloom filter and the nr value of + this new Bloom filter is set to one. A given key is said to belong to the + DBF if the k positions are set to one in one of the matrix rows. +

+ Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + @see BloomFilter A Bloom filter + + @see Theory and Network Applications of Dynamic Bloom Filters]]> + + + + + + + + + + + this filter. + @param nbHash The number of hash functions to consider. + @param hashType type of the hashing function (see {@link Hash}).]]> + + + + + + this filter. + @param key The key to add.]]> + + + + + + this filter. + @param key The key to test. + @return boolean True if the specified key belongs to this filter. + False otherwise.]]> + + + + + + this filter and a specified filter. +

+ Invariant: The result is assigned to this filter. + @param filter The filter to AND with.]]> + + + + + + this filter and a specified filter. +

+ Invariant: The result is assigned to this filter. + @param filter The filter to OR with.]]> + + + + + + this filter and a specified filter. +

+ Invariant: The result is assigned to this filter. + @param filter The filter to XOR with.]]> + + + + + this filter. +

+ The result is assigned to this filter.]]> + + + + + + this filter. + @param keys The list of keys.]]> + + + + + + this filter. + @param keys The collection of keys.]]> + + + + + + this filter. + @param keys The array of keys.]]> + + + + + + + + + + + + + this filter.]]> + + + + + + + + + + + + + + + + + + + + A filter is a data structure which aims at offering a lossy summary of a set A. The + key idea is to map entries of A (also called keys) into several positions + in a vector through the use of several hash functions. +

+ Typically, a filter will be implemented as a Bloom filter (or a Bloom filter extension). +

+ It must be extended in order to define the real behavior. + + @see Key The general behavior of a key + @see HashFunction A hash function]]> + + + + + + + + + Builds a hash function that must obey to a given maximum number of returned values and a highest value. + @param maxValue The maximum highest returned value. + @param nbHash The number of resulting hashed values. + @param hashType type of the hashing function (see {@link Hash}).]]> + + + + + this hash function. A NOOP]]> + + + + + + + + + + + + + + + + + + + + + + + + + Builds a key with a default weight. + @param value The byte value of this key.]]> + + + + + + Builds a key with a specified weight. + @param value The value of this key. + @param weight The weight associated to this key.]]> + + + + + + + + + + + + this key.]]> + + + + + this key.]]> + + + + + + this key with a specified value. + @param weight The increment.]]> + + + + + this key by one.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The idea is to randomly select a bit to reset.]]> + + + + + + The idea is to select the bit to reset that will generate the minimum + number of false negative.]]> + + + + + + The idea is to select the bit to reset that will remove the maximum number + of false positive.]]> + + + + + + The idea is to select the bit to reset that will, at the same time, remove + the maximum number of false positve while minimizing the amount of false + negative generated.]]> + + + + + Originally created by + European Commission One-Lab Project 034819.]]> + + + + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + this retouched Bloom filter. +

+ Invariant: if the false positive is null, nothing happens. + @param key The false positive key to add.]]> + + + + + + this retouched Bloom filter. + @param coll The collection of false positive.]]> + + + + + + this retouched Bloom filter. + @param keys The list of false positive.]]> + + + + + + this retouched Bloom filter. + @param keys The array of false positive.]]> + + + + + + + this retouched Bloom filter. + @param scheme The selective clearing scheme to apply.]]> + + + + + + + + + + + + retouched Bloom filter, as defined in the CoNEXT 2006 paper. +

+ It allows the removal of selected false positives at the cost of introducing + random false negatives, and with the benefit of eliminating some random false + positives at the same time. + +

+ Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + @see BloomFilter A Bloom filter + @see RemoveScheme The different selective clearing algorithms + + @see Retouched Bloom Filters: Allowing Networked Applications to Trade Off Selected False Positives Against False Negatives]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + length, and + the provided seed value + @param bytes input bytes + @param length length of the valid bytes to consider + @param initval seed value + @return hash value]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The best hash table sizes are powers of 2. There is no need to do mod + a prime (mod is sooo slow!). If you need less than 32 bits, use a bitmask. + For example, if you need only 10 bits, do + h = (h & hashmask(10)); + In which case, the hash table should have hashsize(10) elements. + +

If you are hashing n strings byte[][] k, do it like this: + for (int i = 0, h = 0; i < n; ++i) h = hash( k[i], h); + +

By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this + code any way you wish, private, educational, or commercial. It's free. + +

Use for hash table lookup, or anything where one collision in 2^^32 is + acceptable. Do NOT use for cryptographic purposes.]]> + + + + + + + + + + + lookup3.c, by Bob Jenkins, May 2006, Public Domain. + + You can use this free for any purpose. It's in the public domain. + It has no warranty. + + + @see lookup3.c + @see Hash Functions (and how this + function compares to others such as CRC, MD?, etc + @see Has update on the + Dr. Dobbs Article]]> + + + + + + + + + + + + + + + + The C version of MurmurHash 2.0 found at that site was ported + to Java by Andrzej Bialecki (ab at getopt org).

]]> +
+
+ +
+ + diff --git a/aarch64/share/hadoop/common/jdiff/hadoop-core_0.21.0.xml b/aarch64/share/hadoop/common/jdiff/hadoop-core_0.21.0.xml new file mode 100644 index 0000000..b88dfab --- /dev/null +++ b/aarch64/share/hadoop/common/jdiff/hadoop-core_0.21.0.xml @@ -0,0 +1,25944 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + UnsupportedOperationException + @param key + @param newKeys + @param customMessage]]> + + + + + + + UnsupportedOperationException + + @param key Key that is to be deprecated + @param newKeys list of keys that take up the values of deprecated key]]> + + + + + + + + + + + + final. + + @param name resource to be added, the classpath is examined for a file + with that name.]]> + + + + + + final. + + @param url url of the resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + final. + + @param file file-path of resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + final. + + @param in InputStream to deserialize the object from.]]> + + + + + + + + + + + name property, null if + no such property exists. If the key is deprecated, it returns the value of + the first key which replaces the deprecated key and is not null + + Values are processed for variable expansion + before being returned. + + @param name the property name. + @return the value of the name or its replacing property, + or null if no such property exists.]]> + + + + + + name property, without doing + variable expansion.If the key is + deprecated, it returns the value of the first key which replaces + the deprecated key and is not null. + + @param name the property name. + @return the value of the name property or + its replacing property and null if no such property exists.]]> + + + + + + + value of the name property. If + name is deprecated, it sets the value to the keys + that replace the deprecated key. + + @param name property name. + @param value property value.]]> + + + + + + + + + + + + + + name. If the key is deprecated, + it returns the value of the first key which replaces the deprecated key + and is not null. + If no such property exists, + then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value, or defaultValue if the property + doesn't exist.]]> + + + + + + + name property as an int. + + If no such property exists, or if the specified value is not a valid + int, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as an int, + or defaultValue.]]> + + + + + + + name property to an int. + + @param name property name. + @param value int value of the property.]]> + + + + + + + name property as a long. + If no such property is specified, or if the specified value is not a valid + long, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a long, + or defaultValue.]]> + + + + + + + name property to a long. + + @param name property name. + @param value long value of the property.]]> + + + + + + + name property as a float. + If no such property is specified, or if the specified value is not a valid + float, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a float, + or defaultValue.]]> + + + + + + + name property to a float. + + @param name property name. + @param value property value.]]> + + + + + + + name property as a boolean. + If no such property is specified, or if the specified value is not a valid + boolean, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a boolean, + or defaultValue.]]> + + + + + + + name property to a boolean. + + @param name property name. + @param value boolean value of the property.]]> + + + + + + + + + + + + + + name property to the given type. This + is equivalent to set(<name>, value.toString()). + @param name property name + @param value new value]]> + + + + + + + + + + + + + + name property as a Pattern. + If no such property is specified, or if the specified value is not a valid + Pattern, then DefaultValue is returned. + + @param name property name + @param defaultValue default value + @return property value as a compiled Pattern, or defaultValue]]> + + + + + + + Pattern. + If the pattern is passed as null, sets the empty pattern which results in + further calls to getPattern(...) returning the default value. + + @param name property name + @param pattern new value]]> + + + + + + + + + + + + + name property as + a collection of Strings. + If no such property is specified then empty collection is returned. +

+ This is an optimized version of {@link #getStrings(String)} + + @param name property name. + @return property value as a collection of Strings.]]> + + + + + + name property as + an array of Strings. + If no such property is specified then null is returned. + + @param name property name. + @return property value as an array of Strings, + or null.]]> + + + + + + + name property as + an array of Strings. + If no such property is specified then default value is returned. + + @param name property name. + @param defaultValue The default value + @return property value as an array of Strings, + or default value.]]> + + + + + + name property as + a collection of Strings, trimmed of the leading and trailing whitespace. + If no such property is specified then empty Collection is returned. + + @param name property name. + @return property value as a collection of Strings, or empty Collection]]> + + + + + + name property as + an array of Strings, trimmed of the leading and trailing whitespace. + If no such property is specified then an empty array is returned. + + @param name property name. + @return property value as an array of trimmed Strings, + or empty array.]]> + + + + + + + name property as + an array of Strings, trimmed of the leading and trailing whitespace. + If no such property is specified then default value is returned. + + @param name property name. + @param defaultValue The default value + @return property value as an array of trimmed Strings, + or default value.]]> + + + + + + + name property as + as comma delimited values. + + @param name property name. + @param values The values]]> + + + + + + + + + + + + + + name property + as an array of Class. + The value of the property specifies a list of comma separated class names. + If no such property is specified, then defaultValue is + returned. + + @param name the property name. + @param defaultValue default value. + @return property value as a Class[], + or defaultValue.]]> + + + + + + + name property as a Class. + If no such property is specified, then defaultValue is + returned. + + @param name the class name. + @param defaultValue default value. + @return property value as a Class, + or defaultValue.]]> + + + + + + + + name property as a Class + implementing the interface specified by xface. + + If no such property is specified, then defaultValue is + returned. + + An exception is thrown if the returned class does not implement the named + interface. + + @param name the class name. + @param defaultValue default value. + @param xface the interface implemented by the named class. + @return property value as a Class, + or defaultValue.]]> + + + + + + + name property as a List + of objects implementing the interface specified by xface. + + An exception is thrown if any of the classes does not exist, or if it does + not implement the named interface. + + @param name the property name. + @param xface the interface implemented by the classes named by + name. + @return a List of objects implementing xface.]]> + + + + + + + + name property to the name of a + theClass implementing the given interface xface. + + An exception is thrown if theClass does not implement the + interface xface. + + @param name property name. + @param theClass property value. + @param xface the interface implemented by the named class.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + + + + + name. + + @param name configuration resource name. + @return an input stream attached to the resource.]]> + + + + + + name. + + @param name configuration resource name. + @return a reader attached to the resource.]]> + + + + + + + + + + + + + + + + + String + key-value pairs in the configuration. + + @return an iterator over the entries.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true to set quiet-mode on, false + to turn it off.]]> + + + + + + + + + + + + + + + + + + + Resources + +

Configurations are specified by resources. A resource contains a set of + name/value pairs as XML data. Each resource is named by either a + String or by a {@link Path}. If named by a String, + then the classpath is examined for a file with that name. If named by a + Path, then the local filesystem is examined directly, without + referring to the classpath. + +

Unless explicitly turned off, Hadoop by default specifies two + resources, loaded in-order from the classpath:

    +
  1. core-default.xml + : Read-only defaults for hadoop.
  2. +
  3. core-site.xml: Site-specific configuration for a given hadoop + installation.
  4. +
+ Applications may add additional resources, which are loaded + subsequent to these resources in the order they are added. + +

Final Parameters

+ +

Configuration parameters may be declared final. + Once a resource declares a value final, no subsequently-loaded + resource can alter that value. + For example, one might define a final parameter with: +

+  <property>
+    <name>dfs.client.buffer.dir</name>
+    <value>/tmp/hadoop/dfs/client</value>
+    <final>true</final>
+  </property>
+ + Administrators typically define parameters as final in + core-site.xml for values that user applications may not alter. + +

Variable Expansion

+ +

Value strings are first processed for variable expansion. The + available properties are:

    +
  1. Other properties defined in this Configuration; and, if a name is + undefined here,
  2. +
  3. Properties in {@link System#getProperties()}.
  4. +
+ +

For example, if a configuration resource contains the following property + definitions: +

+  <property>
+    <name>basedir</name>
+    <value>/user/${user.name}</value>
+  </property>
+  
+  <property>
+    <name>tempdir</name>
+    <value>${basedir}/tmp</value>
+  </property>
+ + When conf.get("tempdir") is called, then ${basedir} + will be resolved to another property in this Configuration, while + ${user.name} would then ordinarily be resolved to the value + of the System property with that name.]]> +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + uri has syntax error]]> + + + + + + + + + + + + + + + + + + + + + + default port;]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + setReplication of FileSystem + @param src file name + @param replication new replication + @throws IOException + @return true if successful; + false if file does not exist or is a directory]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + EnumSet.of(CreateFlag.CREATE, CreateFlag.APPEND) + + and pass it to {@link org.apache.hadoop.fs.FileSystem #create(Path f, FsPermission permission, + EnumSet flag, int bufferSize, short replication, long blockSize, + Progressable progress)}. + +

+ Combine {@link #OVERWRITE} with either {@link #CREATE} + or {@link #APPEND} does the same as only use + {@link #OVERWRITE}.
+ Combine {@link #CREATE} with {@link #APPEND} has the semantic: +

    +
  1. create the file if it does not exist; +
  2. append the file if it already exists. +
]]> +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + defaultFsUri is not supported]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • Progress - to report progress on the operation - default null +
  • Permission - umask is applied against permisssion: default is + FsPermissions:getDefault() + +
  • CreateParent - create missing parent path; default is to not + to create parents +
  • The defaults for the following are SS defaults of the file + server implementing the target path. Not all parameters make sense + for all kinds of file system - eg. localFS ignores Blocksize, + replication, checksum +
      +
    • BufferSize - buffersize used in FSDataOutputStream +
    • Blocksize - block size for file blocks +
    • ReplicationFactor - replication for blocks +
    • BytesPerChecksum - bytes per checksum +
    + + + @return {@link FSDataOutputStream} for created file + + @throws AccessControlException If access is denied + @throws FileAlreadyExistsException If file f already exists + @throws FileNotFoundException If parent of f does not exist + and createParent is false + @throws ParentNotDirectoryException If parent of f is not a + directory. + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path f is not valid]]> + + + + + + + + + + + + + + dir
    already + exists + @throws FileNotFoundException If parent of dir does not exist + and createParent is false + @throws ParentNotDirectoryException If parent of dir is not a + directory + @throws UnsupportedFileSystemException If file system for dir + is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path dir is not valid]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path f is invalid]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f + is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + f does not exist + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + +
  • Fails if src is a file and dst is a directory. +
  • Fails if src is a directory and dst is a file. +
  • Fails if the parent of dst does not exist or is a file. + +

    + If OVERWRITE option is not passed as an argument, rename fails if the dst + already exists. +

    + If OVERWRITE option is passed as an argument, rename overwrites the dst if + it is a file or an empty directory. Rename fails if dst is a non-empty + directory. +

    + Note that atomicity of rename is dependent on the file system + implementation. Please refer to the file system documentation for details +

    + + @param src path to be renamed + @param dst new path after rename + + @throws AccessControlException If access is denied + @throws FileAlreadyExistsException If dst already exists and + options has {@link Rename#OVERWRITE} option + false. + @throws FileNotFoundException If src does not exist + @throws ParentNotDirectoryException If parent of dst is not a + directory + @throws UnsupportedFileSystemException If file system for src + and dst is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f + is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws HadoopIllegalArgumentException If username or + groupname is invalid.]]> + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + f does not exist + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred]]> + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path f is invalid]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + Given a path referring to a symlink of form: + + <---X---> + fs://host/A/B/link + <-----Y-----> + + In this path X is the scheme and authority that identify the file system, + and Y is the path leading up to the final path component "link". If Y is + a symlink itself then let Y' be the target of Y and X' be the scheme and + authority of Y'. Symlink targets may: + + 1. Fully qualified URIs + + fs://hostX/A/B/file Resolved according to the target file system. + + 2. Partially qualified URIs (eg scheme but no host) + + fs:///A/B/file Resolved according to the target file sytem. Eg resolving + a symlink to hdfs:///A results in an exception because + HDFS URIs must be fully qualified, while a symlink to + file:///A will not since Hadoop's local file systems + require partially qualified URIs. + + 3. Relative paths + + path Resolves to [Y'][path]. Eg if Y resolves to hdfs://host/A and path + is "../B/file" then [Y'][path] is hdfs://host/B/file + + 4. Absolute paths + + path Resolves to [X'][path]. Eg if Y resolves hdfs://host/A/B and path + is "/file" then [X][path] is hdfs://host/file + + + @param target the target of the symbolic link + @param link the path to be created that points to target + @param createParent if true then missing parent dirs are created if + false then parent must exist + + + @throws AccessControlException If access is denied + @throws FileAlreadyExistsException If file linkcode> already exists + @throws FileNotFoundException If target does not exist + @throws ParentNotDirectoryException If parent of link is not a + directory. + @throws UnsupportedFileSystemException If file system for + target or link is not supported + @throws IOException If an I/O error occurred]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + + + + + + + + + + + *** Path Names *** +

    + + The Hadoop file system supports a URI name space and URI names. + It offers a forest of file systems that can be referenced using fully + qualified URIs. + Two common Hadoop file systems implementations are +

      +
    • the local file system: file:///path +
    • the hdfs file system hdfs://nnAddress:nnPort/path +
    + + While URI names are very flexible, it requires knowing the name or address + of the server. For convenience one often wants to access the default system + in one's environment without knowing its name/address. This has an + additional benefit that it allows one to change one's default fs + (e.g. admin moves application from cluster1 to cluster2). +

    + + To facilitate this, Hadoop supports a notion of a default file system. + The user can set his default file system, although this is + typically set up for you in your environment via your default config. + A default file system implies a default scheme and authority; slash-relative + names (such as /for/bar) are resolved relative to that default FS. + Similarly a user can also have working-directory-relative names (i.e. names + not starting with a slash). While the working directory is generally in the + same default FS, the wd can be in a different FS. +

    + Hence Hadoop path names can be one of: +

      +
    • fully qualified URI: scheme://authority/path +
    • slash relative names: /path relative to the default file system +
    • wd-relative names: path relative to the working dir +
    + Relative paths with scheme (scheme:foo/bar) are illegal. + +

    + ****The Role of the FileContext and configuration defaults**** +

    + The FileContext provides file namespace context for resolving file names; + it also contains the umask for permissions, In that sense it is like the + per-process file-related state in Unix system. + These two properties +

      +
    • default file system i.e your slash) +
    • umask +
    + in general, are obtained from the default configuration file + in your environment, (@see {@link Configuration}). + + No other configuration parameters are obtained from the default config as + far as the file context layer is concerned. All file system instances + (i.e. deployments of file systems) have default properties; we call these + server side (SS) defaults. Operation like create allow one to select many + properties: either pass them in as explicit parameters or use + the SS properties. +

    + The file system related SS defaults are +

      +
    • the home directory (default is "/user/userName") +
    • the initial wd (only for local fs) +
    • replication factor +
    • block size +
    • buffer size +
    • bytesPerChecksum (if used). +
    + +

    + *** Usage Model for the FileContext class *** +

    + Example 1: use the default config read from the $HADOOP_CONFIG/core.xml. + Unspecified values come from core-defaults.xml in the release jar. +

      +
    • myFContext = FileContext.getFileContext(); // uses the default config + // which has your default FS +
    • myFContext.create(path, ...); +
    • myFContext.setWorkingDir(path) +
    • myFContext.open (path, ...); +
    + Example 2: Get a FileContext with a specific URI as the default FS +
      +
    • myFContext = FileContext.getFileContext(URI) +
    • myFContext.create(path, ...); + ... +
    + Example 3: FileContext with local file system as the default +
      +
    • myFContext = FileContext.getLocalFSFileContext() +
    • myFContext.create(path, ...); +
    • ... +
    + Example 4: Use a specific config, ignoring $HADOOP_CONFIG + Generally you should not need use a config unless you are doing +
      +
    • configX = someConfigSomeOnePassedToYou. +
    • myFContext = getFileContext(configX); // configX is not changed, + // is passed down +
    • myFContext.create(path, ...); +
    • ... +
    ]]> + + + + + + + + + + + + + + path could + not be resolved + @throws IOException an I/O error occured]]> + + + + + + + + + + + + + + + + + + + + + + + + + + f is + not supported + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for + f is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for + pathPattern is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + files does not + exist + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + Return all the files that match filePattern and are not checksum + files. Results are sorted by their names. + +

    + A filename pattern is composed of regular characters and + special pattern matching characters, which are: + +

    +
    +
    +

    +

    ? +
    Matches any single character. + +

    +

    * +
    Matches zero or more characters. + +

    +

    [abc] +
    Matches a single character from character set + {a,b,c}. + +

    +

    [a-b] +
    Matches a single character from the character range + {a...b}. Note: character a must be + lexicographically less than or equal to character b. + +

    +

    [^a] +
    Matches a single char that is not from character set or range + {a}. Note that the ^ character must occur + immediately to the right of the opening bracket. + +

    +

    \c +
    Removes (escapes) any special meaning of character c. + +

    +

    {ab,cd} +
    Matches a string from the string set {ab, cd} + +

    +

    {ab,c{de,fh}} +
    Matches a string from string set {ab, cde, cfh} + +
    +
    +
    + + @param pathPattern a regular expression specifying a pth pattern + + @return an array of paths that match the path pattern + + @throws AccessControlException If access is denied + @throws UnsupportedFileSystemException If file system for + pathPattern is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> +
    +
    + + + + + + + + pathPattern is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + dst already exists + @throws FileNotFoundException If src does not exist + @throws ParentNotDirectoryException If parent of dst is not + a directory + @throws UnsupportedFileSystemException If file system for + src or dst is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path dst is invalid]]> + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + fs.scheme.class whose value names the FileSystem class. + The entire URI is passed to the FileSystem instance's initialize method.]]> + + + + + + + + + + + + + + + + + + fs.scheme.class whose value names the FileSystem class. + The entire URI is passed to the FileSystem instance's initialize method. + This always returns a new FileSystem object.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • Fails if src is a file and dst is a directory. +
  • Fails if src is a directory and dst is a file. +
  • Fails if the parent of dst does not exist or is a file. + +

    + If OVERWRITE option is not passed as an argument, rename fails + if the dst already exists. +

    + If OVERWRITE option is passed as an argument, rename overwrites + the dst if it is a file or an empty directory. Rename fails if dst is + a non-empty directory. +

    + Note that atomicity of rename is dependent on the file system + implementation. Please refer to the file system documentation for + details. This default implementation is non atomic. +

    + This method is deprecated since it is a temporary method added to + support the transition from FileSystem to FileContext for user + applications. + + @param src path to be renamed + @param dst new path after rename + @throws IOException on failure]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Return all the files that match filePattern and are not checksum + files. Results are sorted by their names. + +

    + A filename pattern is composed of regular characters and + special pattern matching characters, which are: + +

    +
    +
    +

    +

    ? +
    Matches any single character. + +

    +

    * +
    Matches zero or more characters. + +

    +

    [abc] +
    Matches a single character from character set + {a,b,c}. + +

    +

    [a-b] +
    Matches a single character from the character range + {a...b}. Note that character a must be + lexicographically less than or equal to character b. + +

    +

    [^a] +
    Matches a single character that is not from character set or range + {a}. Note that the ^ character must occur + immediately to the right of the opening bracket. + +

    +

    \c +
    Removes (escapes) any special meaning of character c. + +

    +

    {ab,cd} +
    Matches a string from the string set {ab, cd} + +

    +

    {ab,c{de,fh}} +
    Matches a string from the string set {ab, cde, cfh} + +
    +
    +
    + + @param pathPattern a regular expression specifying a pth pattern + + @return an array of paths that match the path pattern + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + All user code that may potentially use the Hadoop Distributed + File System should be written to use a FileSystem object. The + Hadoop DFS is a multi-machine system that appears as a single + disk. It's useful because of its fault tolerance and potentially + very large capacity. + +

    + The local implementation is {@link LocalFileSystem} and distributed + implementation is DistributedFileSystem.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FilterFileSystem contains + some other file system, which it uses as + its basic file system, possibly transforming + the data along the way or providing additional + functionality. The class FilterFileSystem + itself simply overrides all methods of + FileSystem with versions that + pass all requests to the contained file + system. Subclasses of FilterFileSystem + may further override some of these methods + and may also provide additional methods + and fields.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + path is invalid]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if and only if pathname + should be included]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + trash feature. Files are moved to a user's trash + directory, a subdirectory of their home directory named ".Trash". Files are + initially moved to a current sub-directory of the trash directory. + Within that sub-directory their original path is preserved. Periodically + one may checkpoint the current trash and remove older checkpoints. (This + design permits trash management without enumeration of the full trash + content, without date support in the filesystem, and without clock + synchronization.)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} backed by an FTP client provided by Apache Commons Net. +

    ]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A client for the Kosmos filesystem (KFS) + +

    Introduction

    + +This pages describes how to use Kosmos Filesystem +( KFS ) as a backing +store with Hadoop. This page assumes that you have downloaded the +KFS software and installed necessary binaries as outlined in the KFS +documentation. + +

    Steps

    + +
      +
    • In the Hadoop conf directory edit core-site.xml, + add the following: +
      +<property>
      +  <name>fs.kfs.impl</name>
      +  <value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value>
      +  <description>The FileSystem for kfs: uris.</description>
      +</property>
      +            
      + +
    • In the Hadoop conf directory edit core-site.xml, + adding the following (with appropriate values for + <server> and <port>): +
      +<property>
      +  <name>fs.default.name</name>
      +  <value>kfs://<server:port></value> 
      +</property>
      +
      +<property>
      +  <name>fs.kfs.metaServerHost</name>
      +  <value><server></value>
      +  <description>The location of the KFS meta server.</description>
      +</property>
      +
      +<property>
      +  <name>fs.kfs.metaServerPort</name>
      +  <value><port></value>
      +  <description>The location of the meta server's port.</description>
      +</property>
      +
      +
      +
    • + +
    • Copy KFS's kfs-0.1.jar to Hadoop's lib directory. This step + enables Hadoop's to load the KFS specific modules. Note + that, kfs-0.1.jar was built when you compiled KFS source + code. This jar file contains code that calls KFS's client + library code via JNI; the native code is in KFS's + libkfsClient.so library. +
    • + +
    • When the Hadoop map/reduce trackers start up, those +processes (on local as well as remote nodes) will now need to load +KFS's libkfsClient.so library. To simplify this process, it is advisable to +store libkfsClient.so in an NFS accessible directory (similar to where +Hadoop binaries/scripts are stored); then, modify Hadoop's +conf/hadoop-env.sh adding the following line and providing suitable +value for <path>: +
      +export LD_LIBRARY_PATH=<path>
      +
      + + +
    • Start only the map/reduce trackers +
      + example: execute Hadoop's bin/start-mapred.sh
    • +
    +
    + +If the map/reduce job trackers start up, all file-I/O is done to KFS.]]> +
    +
    + + + + + + + + + + + + + + + + (cause==null ? null : cause.toString()) (which + typically contains the class and detail message of cause). + @param cause the cause (which is saved for later retrieval by the + {@link #getCause()} method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + mode is invalid]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is a tool for migrating data from an older to a newer version + of an S3 filesystem. +

    +

    + All files in the filesystem are migrated by re-writing the block metadata + - no datafiles are touched. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A block-based {@link FileSystem} backed by + Amazon S3. +

    + @see NativeS3FileSystem]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + A distributed, block-based implementation of {@link +org.apache.hadoop.fs.FileSystem} that uses Amazon S3 +as a backing store.

    + +

    +Files are stored in S3 as blocks (represented by +{@link org.apache.hadoop.fs.s3.Block}), which have an ID and a length. +Block metadata is stored in S3 as a small record (represented by +{@link org.apache.hadoop.fs.s3.INode}) using the URL-encoded +path string as a key. Inodes record the file type (regular file or directory) and the list of blocks. +This design makes it easy to seek to any given position in a file by reading the inode data to compute +which block to access, then using S3's support for +HTTP Range headers +to start streaming from the correct position. +Renames are also efficient since only the inode is moved (by a DELETE followed by a PUT since +S3 does not support renames). +

    +

    +For a single file /dir1/file1 which takes two blocks of storage, the file structure in S3 +would be something like this: +

    +
    +/
    +/dir1
    +/dir1/file1
    +block-6415776850131549260
    +block-3026438247347758425
    +
    +

    +Inodes start with a leading /, while blocks are prefixed with block-. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If f is a file, this method will make a single call to S3. + If f is a directory, this method will make a maximum of + (n / 1000) + 2 calls to S3, where n is the total number of + files and directories contained directly in f. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} for reading and writing files stored on + Amazon S3. + Unlike {@link org.apache.hadoop.fs.s3.S3FileSystem} this implementation + stores files on S3 in their + native form so they can be read by other S3 tools. + + A note about directories. S3 of course has no "native" support for them. + The idiom we choose then is: for any directory created by this class, + we use an empty object "#{dirpath}_$folder$" as a marker. + Further, to interoperate with other S3 tools, we also accept the following: + - an object "#{dirpath}/' denoting a directory marker + - if there exists any objects with the prefix "#{dirpath}/", then the + directory is said to exist + - if both a file with the name of a directory and a marker for that + directory exists, then the *file masks the directory*, and the directory + is never returned. +

    + @see org.apache.hadoop.fs.s3.S3FileSystem]]> +
    +
    + + + +A distributed implementation of {@link +org.apache.hadoop.fs.FileSystem} for reading and writing files on +Amazon S3. +Unlike {@link org.apache.hadoop.fs.s3.S3FileSystem}, which is block-based, +this implementation stores +files on S3 in their native form for interoperability with other S3 tools. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + nth value.]]> + + + + + + + + + + + + + + + + + + + + + nth value in the file.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + public class IntArrayWritable extends ArrayWritable { + public IntArrayWritable() { + super(IntWritable.class); + } + } + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a ByteWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the item + @param conf the configuration to store + @param item the object to be stored + @param keyName the name of the key to use + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param items the objects to be stored + @param keyName the name of the key to use + @throws IndexOutOfBoundsException if the items array is empty + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + DefaultStringifier offers convenience methods to store/load objects to/from + the configuration. + + @param the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a DoubleWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + value argument is null or + its size is zero, the elementType argument must not be null. If + the argument value's size is bigger than zero, the argument + elementType is not be used. + + @param value + @param elementType]]> + + + + + value should not be null + or empty. + + @param value]]> + + + + + + + + + + + + + + value and elementType. If the value argument + is null or its size is zero, the elementType argument must not be + null. If the argument value's size is bigger than zero, the + argument elementType is not be used. + + @param value + @param elementType]]> + + + + + + + + + + + + + + + + + + + + + + + + + o is an EnumSetWritable with the same value, + or both are null.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a FloatWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When two sequence files, which have same Key type but different Value + types, are mapped out to reduce, multiple Value types is not allowed. + In this case, this class can help you wrap instances with different types. +

    + +

    + Compared with ObjectWritable, this class is much more effective, + because ObjectWritable will append the class declaration as a String + into the output file in every Key-Value pair. +

    + +

    + Generic Writable implements {@link Configurable} interface, so that it will be + configured by the framework. The configuration is passed to the wrapped objects + implementing {@link Configurable} interface before deserialization. +

    + + how to use it:
    + 1. Write your own class, such as GenericObject, which extends GenericWritable.
    + 2. Implements the abstract method getTypes(), defines + the classes which will be wrapped in GenericObject in application. + Attention: this classes defined in getTypes() method, must + implement Writable interface. +

    + + The code looks like this: +
    + public class GenericObject extends GenericWritable {
    + 
    +   private static Class[] CLASSES = {
    +               ClassType1.class, 
    +               ClassType2.class,
    +               ClassType3.class,
    +               };
    +
    +   protected Class[] getTypes() {
    +       return CLASSES;
    +   }
    +
    + }
    + 
    + + @since Nov 8, 2006]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a IntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + closes the input and output streams + at the end. + @param in InputStrem to read from + @param out OutputStream to write to + @param conf the Configuration object]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ignore any {@link IOException} or + null pointers. Must only be used for cleanup in exception handlers. + @param log the log to record problems to at debug level. Can be null. + @param closeables the objects to close]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a LongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A map is a directory containing two files, the data file, + containing all keys and values in the map, and a smaller index + file, containing a fraction of the keys. The fraction is determined by + {@link Writer#getIndexInterval()}. + +

    The index file is read entirely into memory. Thus key implementations + should try to keep themselves small. + +

    Map files are created by adding entries in-order. To maintain a large + database, perform updates by copying the previous version of a database and + merging in a sorted change list, to create a new version of the database in + a new file. Sorting large change lists can be done with {@link + SequenceFile.Sorter}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key and + val. Returns true if such a pair exists and false when at + the end of the map]]> + + + + + + + + + + + + + + + + key or if it does not exist, at the first entry + after the named key. + +- * @param key - key that we're trying to find +- * @param val - data value if key is found +- * @return - the key that was the closest match or null if eof.]]> + + + + + + + + + key does not exist, return + the first entry that falls just before the key. Otherwise, + return the record that sorts just after. + @return - the key that was the closest match or null if eof.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is an MD5Hash whose digest contains the + same values.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + className by first finding + it in the specified conf. If the specified conf is null, + try load it directly.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link Comparator} that operates directly on byte representations of + objects. +

    + @param + @see DeserializerComparator]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SequenceFiles are flat files consisting of binary key/value + pairs. + +

    SequenceFile provides {@link Writer}, {@link Reader} and + {@link Sorter} classes for writing, reading and sorting respectively.

    + + There are three SequenceFile Writers based on the + {@link CompressionType} used to compress key/value pairs: +
      +
    1. + Writer : Uncompressed records. +
    2. +
    3. + RecordCompressWriter : Record-compressed files, only compress + values. +
    4. +
    5. + BlockCompressWriter : Block-compressed files, both keys & + values are collected in 'blocks' + separately and compressed. The size of + the 'block' is configurable. +
    + +

    The actual compression algorithm used to compress key and/or values can be + specified by using the appropriate {@link CompressionCodec}.

    + +

    The recommended way is to use the static createWriter methods + provided by the SequenceFile to chose the preferred format.

    + +

    The {@link Reader} acts as the bridge and can read any of the above + SequenceFile formats.

    + +

    SequenceFile Formats

    + +

    Essentially there are 3 different formats for SequenceFiles + depending on the CompressionType specified. All of them share a + common header described below. + +

    +
      +
    • + version - 3 bytes of magic header SEQ, followed by 1 byte of actual + version number (e.g. SEQ4 or SEQ6) +
    • +
    • + keyClassName -key class +
    • +
    • + valueClassName - value class +
    • +
    • + compression - A boolean which specifies if compression is turned on for + keys/values in this file. +
    • +
    • + blockCompression - A boolean which specifies if block-compression is + turned on for keys/values in this file. +
    • +
    • + compression codec - CompressionCodec class which is used for + compression of keys and/or values (if compression is + enabled). +
    • +
    • + metadata - {@link Metadata} for this file. +
    • +
    • + sync - A sync marker to denote end of the header. +
    • +
    + +
    Uncompressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Value
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +
    Record-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Compressed Value
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +
    Block-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record Block +
        +
      • Compressed key-lengths block-size
      • +
      • Compressed key-lengths block
      • +
      • Compressed keys block-size
      • +
      • Compressed keys block
      • +
      • Compressed value-lengths block-size
      • +
      • Compressed value-lengths block
      • +
      • Compressed values block-size
      • +
      • Compressed values block
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +

    The compressed blocks of key lengths and value lengths consist of the + actual lengths of individual keys/values encoded in ZeroCompressedInteger + format.

    + + @see CompressionCodec]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + = 0. Otherwise, + the length is not available. + @return The opened stream. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key, skipping its + value. True if another entry exists, and false at end of file.]]> + + + + + + + + key and + val. Returns true if such a pair exists and false when at + end of file]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The position passed must be a position returned by {@link + SequenceFile.Writer#getLength()} when writing this file. To seek to an arbitrary + position, use {@link SequenceFile.Reader#sync(long)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SegmentDescriptor + @param segments the list of SegmentDescriptors + @param tmpDir the directory to write temporary files into + @return RawKeyValueIterator + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For best performance, applications should make sure that the {@link + Writable#readFields(DataInput)} implementation of their keys is + very efficient. In particular, it should avoid allocating memory.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This always returns a synchronized position. In other words, + immediately after calling {@link SequenceFile.Reader#seek(long)} with a position + returned by this method, {@link SequenceFile.Reader#next(Writable)} may be called. However + the key may be earlier in the file than key last written when this + method was called (e.g., with block-compression, it may be the first key + in the block that was being written when this method was called).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key. Returns + true if such a key exists and false when at the end of the set.]]> + + + + + + + key. + Returns key, or null if no match exists.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + position. Note that this + method avoids using the converter or doing String instatiation + @return the Unicode scalar value at position or -1 + if the position is invalid or points to a + trailing byte]]> + + + + + + + + + + what in the backing + buffer, starting as position start. The starting + position is measured in bytes and the return value is in + terms of byte position in the buffer. The backing buffer is + not converted to a string for this operation. + @return byte position of the first occurence of the search + string in the UTF-8 buffer or -1 if not found]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a Text with the same contents.]]> + + + + + + + + + + + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException.]]> + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException. + @return ByteBuffer: bytes stores at ByteBuffer.array() + and length is ByteBuffer.limit()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + In + addition, it provides methods for string traversal without converting the + byte array to a string.

    Also includes utilities for + serializing/deserialing a string, coding/decoding a string, checking if a + byte array contains valid UTF8 code, calculating the length of an encoded + string.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is useful when a class may evolve, so that instances written by the + old version of the class may still be processed by the new version. To + handle this situation, {@link #readFields(DataInput)} + implementations should catch {@link VersionMismatchException}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VIntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VLongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + out. + + @param out DataOuput to serialize this object into. + @throws IOException]]> + + + + + + + in. + +

    For efficiency, implementations should attempt to re-use storage in the + existing object where possible.

    + + @param in DataInput to deseriablize this object from. + @throws IOException]]> +
    + + + Any key or value type in the Hadoop Map-Reduce + framework implements this interface.

    + +

    Implementations typically implement a static read(DataInput) + method which constructs a new instance, calls {@link #readFields(DataInput)} + and returns the instance.

    + +

    Example:

    +

    +     public class MyWritable implements Writable {
    +       // Some data     
    +       private int counter;
    +       private long timestamp;
    +       
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +       
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +       
    +       public static MyWritable read(DataInput in) throws IOException {
    +         MyWritable w = new MyWritable();
    +         w.readFields(in);
    +         return w;
    +       }
    +     }
    + 

    ]]> +
    + + + + + + + + WritableComparables can be compared to each other, typically + via Comparators. Any type which is to be used as a + key in the Hadoop Map-Reduce framework should implement this + interface.

    + +

    Example:

    +

    +     public class MyWritableComparable implements
    +         WritableComparable<MyWritableComparable> {
    +
    +       // Some data
    +       private int counter;
    +       private long timestamp;
    +       
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +       
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +       
    +       public int compareTo(MyWritableComparable other) {
    +         int thisValue = this.counter;
    +         int thatValue = other.counter;
    +         return (thisValue < thatValue ? -1 : (thisValue == thatValue ? 0 : 1));
    +       }
    +     }
    + 

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The default implementation reads the data into two {@link + WritableComparable}s (using {@link + Writable#readFields(DataInput)}, then calls {@link + #compare(WritableComparable,WritableComparable)}.]]> + + + + + + + The default implementation uses the natural ordering, calling {@link + Comparable#compareTo(Object)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This base implemenation uses the natural ordering. To define alternate + orderings, override {@link #compare(WritableComparable,WritableComparable)}. + +

    One may optimize compare-intensive operations by overriding + {@link #compare(byte[],int,int,byte[],int,int)}. Static utility methods are + provided to assist in optimized implementations of this method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Enum type + @param in DataInput to read from + @param enumType Class type of Enum + @return Enum represented by String read from DataInput + @throws IOException]]> + + + + + + + + + + + + + + + + len number of bytes in input streamin + @param in input stream + @param len number of bytes to skip + @throws IOException when skipped less number of bytes]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CompressionCodec for which to get the + Compressor + @param conf the Configuration object which contains confs for creating or reinit the compressor + @return Compressor for the given + CompressionCodec from the pool or a new one]]> + + + + + + + + + CompressionCodec for which to get the + Decompressor + @return Decompressor for the given + CompressionCodec the pool or a new one]]> + + + + + + Compressor to be returned to the pool]]> + + + + + + Decompressor to be returned to the + pool]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Implementations are assumed to be buffered. This permits clients to + reposition the underlying input stream then call {@link #resetState()}, + without having to also synchronize client buffers.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + true if a preset dictionary is needed for decompression. + @return true if a preset dictionary is needed for decompression]]> + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

  • "none" - No compression. +
  • "lzo" - LZO compression. +
  • "gz" - GZIP compression. + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • Block Compression. +
  • Named meta data blocks. +
  • Sorted or unsorted keys. +
  • Seek by key or by file offset. + + The memory footprint of a TFile includes the following: +
      +
    • Some constant overhead of reading or writing a compressed block. +
        +
      • Each compressed block requires one compression/decompression codec for + I/O. +
      • Temporary space to buffer the key. +
      • Temporary space to buffer the value (for TFile.Writer only). Values are + chunk encoded, so that we buffer at most one chunk of user data. By default, + the chunk buffer is 1MB. Reading chunked value does not require additional + memory. +
      +
    • TFile index, which is proportional to the total number of Data Blocks. + The total amount of memory needed to hold the index can be estimated as + (56+AvgKeySize)*NumBlocks. +
    • MetaBlock index, which is proportional to the total number of Meta + Blocks.The total amount of memory needed to hold the index for Meta Blocks + can be estimated as (40+AvgMetaBlockName)*NumMetaBlock. +
    +

    + The behavior of TFile can be customized by the following variables through + Configuration: +

      +
    • tfile.io.chunk.size: Value chunk size. Integer (in bytes). Default + to 1MB. Values of the length less than the chunk size is guaranteed to have + known value length in read time (See + {@link TFile.Reader.Scanner.Entry#isValueLengthKnown()}). +
    • tfile.fs.output.buffer.size: Buffer size used for + FSDataOutputStream. Integer (in bytes). Default to 256KB. +
    • tfile.fs.input.buffer.size: Buffer size used for + FSDataInputStream. Integer (in bytes). Default to 256KB. +
    +

    + Suggestions on performance optimization. +

      +
    • Minimum block size. We recommend a setting of minimum block size between + 256KB to 1MB for general usage. Larger block size is preferred if files are + primarily for sequential access. However, it would lead to inefficient random + access (because there are more data to decompress). Smaller blocks are good + for random access, but require more memory to hold the block index, and may + be slower to create (because we must flush the compressor stream at the + conclusion of each data block, which leads to an FS I/O flush). Further, due + to the internal caching in Compression codec, the smallest possible block + size would be around 20KB-30KB. +
    • The current implementation does not offer true multi-threading for + reading. The implementation uses FSDataInputStream seek()+read(), which is + shown to be much faster than positioned-read call in single thread mode. + However, it also means that if multiple threads attempt to access the same + TFile (using multiple scanners) simultaneously, the actual I/O is carried out + sequentially even if they access different DFS blocks. +
    • Compression codec. Use "none" if the data is not very compressable (by + compressable, I mean a compression ratio at least 2:1). Generally, use "lzo" + as the starting point for experimenting. "gz" overs slightly better + compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to + decompress, comparing to "lzo". +
    • File system buffering, if the underlying FSDataInputStream and + FSDataOutputStream is already adequately buffered; or if applications + reads/writes keys and values in large buffers, we can reduce the sizes of + input/output buffering in TFile layer by setting the configuration parameters + "tfile.fs.input.buffer.size" and "tfile.fs.output.buffer.size". +
    + + Some design rationale behind TFile can be found at Hadoop-3315.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + entry of the TFile. + @param endKey + End key of the scan. If null, scan up to the last entry + of the TFile. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Use {@link Scanner#atEnd()} to test whether the cursor is at the end + location of the scanner. +

    + Use {@link Scanner#advance()} to move the cursor to the next key-value + pair (or end if none exists). Use seekTo methods ( + {@link Scanner#seekTo(byte[])} or + {@link Scanner#seekTo(byte[], int, int)}) to seek to any arbitrary + location in the covered range (including backward seeking). Use + {@link Scanner#rewind()} to seek back to the beginning of the scanner. + Use {@link Scanner#seekToEnd()} to seek to the end of the scanner. +

    + Actual keys and values may be obtained through {@link Scanner.Entry} + object, which is obtained through {@link Scanner#entry()}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

  • Algorithmic comparator: binary comparators that is language + independent. Currently, only "memcmp" is supported. +
  • Language-specific comparator: binary comparators that can + only be constructed in specific language. For Java, the syntax + is "jclass:", followed by the class name of the RawComparator. + Currently, we only support RawComparators that can be + constructed through the default constructor (with no + parameters). Parameterized RawComparators such as + {@link WritableComparator} or + {@link JavaSerializationComparator} may not be directly used. + One should write a wrapper class that inherits from such classes + and use its default constructor to perform proper + initialization. + + @param conf + The configuration object. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + If an exception is thrown, the TFile will be in an inconsistent + state. The only legitimate call after that would be close]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Utils#writeVLong(out, n). + + @param out + output stream + @param n + The integer to be encoded + @throws IOException + @see Utils#writeVLong(DataOutput, long)]]> + + + + + + + + +
  • if n in [-32, 127): encode in one byte with the actual value. + Otherwise, +
  • if n in [-20*2^8, 20*2^8): encode in two bytes: byte[0] = n/256 - 52; + byte[1]=n&0xff. Otherwise, +
  • if n IN [-16*2^16, 16*2^16): encode in three bytes: byte[0]=n/2^16 - + 88; byte[1]=(n>>8)&0xff; byte[2]=n&0xff. Otherwise, +
  • if n in [-8*2^24, 8*2^24): encode in four bytes: byte[0]=n/2^24 - 112; + byte[1] = (n>>16)&0xff; byte[2] = (n>>8)&0xff; byte[3]=n&0xff. Otherwise: +
  • if n in [-2^31, 2^31): encode in five bytes: byte[0]=-125; byte[1] = + (n>>24)&0xff; byte[2]=(n>>16)&0xff; byte[3]=(n>>8)&0xff; byte[4]=n&0xff; +
  • if n in [-2^39, 2^39): encode in six bytes: byte[0]=-124; byte[1] = + (n>>32)&0xff; byte[2]=(n>>24)&0xff; byte[3]=(n>>16)&0xff; + byte[4]=(n>>8)&0xff; byte[5]=n&0xff +
  • if n in [-2^47, 2^47): encode in seven bytes: byte[0]=-123; byte[1] = + (n>>40)&0xff; byte[2]=(n>>32)&0xff; byte[3]=(n>>24)&0xff; + byte[4]=(n>>16)&0xff; byte[5]=(n>>8)&0xff; byte[6]=n&0xff; +
  • if n in [-2^55, 2^55): encode in eight bytes: byte[0]=-122; byte[1] = + (n>>48)&0xff; byte[2] = (n>>40)&0xff; byte[3]=(n>>32)&0xff; + byte[4]=(n>>24)&0xff; byte[5]=(n>>16)&0xff; byte[6]=(n>>8)&0xff; + byte[7]=n&0xff; +
  • if n in [-2^63, 2^63): encode in nine bytes: byte[0]=-121; byte[1] = + (n>>54)&0xff; byte[2] = (n>>48)&0xff; byte[3] = (n>>40)&0xff; + byte[4]=(n>>32)&0xff; byte[5]=(n>>24)&0xff; byte[6]=(n>>16)&0xff; + byte[7]=(n>>8)&0xff; byte[8]=n&0xff; + + + @param out + output stream + @param n + the integer number + @throws IOException]]> + + + + + + + (int)Utils#readVLong(in). + + @param in + input stream + @return the decoded integer + @throws IOException + + @see Utils#readVLong(DataInput)]]> + + + + + + + +
  • if (FB >= -32), return (long)FB; +
  • if (FB in [-72, -33]), return (FB+52)<<8 + NB[0]&0xff; +
  • if (FB in [-104, -73]), return (FB+88)<<16 + (NB[0]&0xff)<<8 + + NB[1]&0xff; +
  • if (FB in [-120, -105]), return (FB+112)<<24 + (NB[0]&0xff)<<16 + + (NB[1]&0xff)<<8 + NB[2]&0xff; +
  • if (FB in [-128, -121]), return interpret NB[FB+129] as a signed + big-endian integer. + + @param in + input stream + @return the decoded long integer. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @param cmp + Comparator for the key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @param cmp + Comparator for the key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + An experimental {@link Serialization} for Java {@link Serializable} classes. +

    + @see JavaSerializationComparator]]> +
    +
    + + + + + + + + + + + + + A {@link RawComparator} that uses a {@link JavaSerialization} + {@link Deserializer} to deserialize objects that are then compared via + their {@link Comparable} interfaces. +

    + @param + @see JavaSerialization]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + +This package provides a mechanism for using different serialization frameworks +in Hadoop. The property "io.serializations" defines a list of +{@link org.apache.hadoop.io.serializer.Serialization}s that know how to create +{@link org.apache.hadoop.io.serializer.Serializer}s and +{@link org.apache.hadoop.io.serializer.Deserializer}s. +

    + +

    +To add a new serialization framework write an implementation of +{@link org.apache.hadoop.io.serializer.Serialization} and add its name to the +"io.serializations" property. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + avro.reflect.pkgs or implement + {@link AvroReflectSerializable} interface.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +This package provides Avro serialization in Hadoop. This can be used to +serialize/deserialize Avro types in Hadoop. +

    + +

    +Use {@link org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization} for +serialization of classes generated by Avro's 'specific' compiler. +

    + +

    +Use {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} for +other classes. +{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} work for +any class which is either in the package list configured via +{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization#AVRO_REFLECT_PACKAGES} +or implement {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerializable} +interface. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + +The API is abstract so that it can be implemented on top of +a variety of metrics client libraries. The choice of +client library is a configuration option, and different +modules within the same application can use +different metrics implementation libraries. +

    +Sub-packages: +

    +
    org.apache.hadoop.metrics.spi
    +
    The abstract Server Provider Interface package. Those wishing to + integrate the metrics API with a particular metrics client library should + extend this package.
    + +
    org.apache.hadoop.metrics.file
    +
    An implementation package which writes the metric data to + a file, or sends it to the standard output stream.
    + +
    org.apache.hadoop.metrics.ganglia
    +
    An implementation package which sends metric data to + Ganglia.
    +
    + +

    Introduction to the Metrics API

    + +Here is a simple example of how to use this package to report a single +metric value: +
    +    private ContextFactory contextFactory = ContextFactory.getFactory();
    +    
    +    void reportMyMetric(float myMetric) {
    +        MetricsContext myContext = contextFactory.getContext("myContext");
    +        MetricsRecord myRecord = myContext.getRecord("myRecord");
    +        myRecord.setMetric("myMetric", myMetric);
    +        myRecord.update();
    +    }
    +
    + +In this example there are three names: +
    +
    myContext
    +
    The context name will typically identify either the application, or else a + module within an application or library.
    + +
    myRecord
    +
    The record name generally identifies some entity for which a set of + metrics are to be reported. For example, you could have a record named + "cacheStats" for reporting a number of statistics relating to the usage of + some cache in your application.
    + +
    myMetric
    +
    This identifies a particular metric. For example, you might have metrics + named "cache_hits" and "cache_misses". +
    +
    + +

    Tags

    + +In some cases it is useful to have multiple records with the same name. For +example, suppose that you want to report statistics about each disk on a computer. +In this case, the record name would be something like "diskStats", but you also +need to identify the disk which is done by adding a tag to the record. +The code could look something like this: +
    +    private MetricsRecord diskStats =
    +            contextFactory.getContext("myContext").getRecord("diskStats");
    +            
    +    void reportDiskMetrics(String diskName, float diskBusy, float diskUsed) {
    +        diskStats.setTag("diskName", diskName);
    +        diskStats.setMetric("diskBusy", diskBusy);
    +        diskStats.setMetric("diskUsed", diskUsed);
    +        diskStats.update();
    +    }
    +
    + +

    Buffering and Callbacks

    + +Data is not sent immediately to the metrics system when +MetricsRecord.update() is called. Instead it is stored in an +internal table, and the contents of the table are sent periodically. +This can be important for two reasons: +
      +
    1. It means that a programmer is free to put calls to this API in an + inner loop, since updates can be very frequent without slowing down + the application significantly.
    2. +
    3. Some implementations can gain efficiency by combining many metrics + into a single UDP message.
    4. +
    + +The API provides a timer-based callback via the +registerUpdater() method. The benefit of this +versus using java.util.Timer is that the callbacks will be done +immediately before sending the data, making the data as current as possible. + +

    Configuration

    + +It is possible to programmatically examine and modify configuration data +before creating a context, like this: +
    +    ContextFactory factory = ContextFactory.getFactory();
    +    ... examine and/or modify factory attributes ...
    +    MetricsContext context = factory.getContext("myContext");
    +
    +The factory attributes can be examined and modified using the following +ContextFactorymethods: +
      +
    • Object getAttribute(String attributeName)
    • +
    • String[] getAttributeNames()
    • +
    • void setAttribute(String name, Object value)
    • +
    • void removeAttribute(attributeName)
    • +
    + +

    +ContextFactory.getFactory() initializes the factory attributes by +reading the properties file hadoop-metrics.properties if it exists +on the class path. + +

    +A factory attribute named: +

    +contextName.class
    +
    +should have as its value the fully qualified name of the class to be +instantiated by a call of the CodeFactory method +getContext(contextName). If this factory attribute is not +specified, the default is to instantiate +org.apache.hadoop.metrics.file.FileContext. + +

    +Other factory attributes are specific to a particular implementation of this +API and are documented elsewhere. For example, configuration attributes for +the file and Ganglia implementations can be found in the javadoc for +their respective packages.]]> + + + + + + + + + + + + + + + + + + + + + + + fileName attribute, + if specified. Otherwise the data will be written to standard + output.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is configured by setting ContextFactory attributes which in turn + are usually configured through a properties file. All the attributes are + prefixed by the contextName. For example, the properties file might contain: +

    + myContextName.fileName=/tmp/metrics.log
    + myContextName.period=5
    + 
    ]]> +
    + + + + +These are the implementation specific factory attributes +(See ContextFactory.getFactory()): + +
    +
    contextName.fileName
    +
    The path of the file to which metrics in context contextName + are to be appended. If this attribute is not specified, the metrics + are written to standard output by default.
    + +
    contextName.period
    +
    The period in seconds on which the metric data is written to the + file.
    + +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + +Implementation of the metrics package that sends metric data to +Ganglia. +Programmers should not normally need to use this package directly. Instead +they should use org.hadoop.metrics. + +

    +These are the implementation specific factory attributes +(See ContextFactory.getFactory()): + +

    +
    contextName.servers
    +
    Space and/or comma separated sequence of servers to which UDP + messages should be sent.
    + +
    contextName.period
    +
    The period in seconds on which the metric data is sent to the + server(s).
    + +
    contextName.units.recordName.metricName
    +
    The units for the specified metric in the specified record.
    + +
    contextName.slope.recordName.metricName
    +
    The slope for the specified metric in the specified record.
    + +
    contextName.tmax.recordName.metricName
    +
    The tmax for the specified metric in the specified record.
    + +
    contextName.dmax.recordName.metricName
    +
    The dmax for the specified metric in the specified record.
    + +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + contextName.tableName. The returned map consists of + those attributes with the contextName and tableName stripped off.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + recordName. + Throws an exception if the metrics implementation is configured with a fixed + set of record names and recordName is not in that set. + + @param recordName the name of the record + @throws MetricsException if recordName conflicts with configuration data]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class implements the internal table of metric data, and the timer + on which data is to be sent to the metrics system. Subclasses must + override the abstract emitRecord method in order to transmit + the data.

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + update + and remove().]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hostname or hostname:port. If + the specs string is null, defaults to localhost:defaultPort. + + @return a list of InetSocketAddress objects.]]> + + + + + + + + + org.apache.hadoop.metrics.file and +org.apache.hadoop.metrics.ganglia.

    + +Plugging in an implementation involves writing a concrete subclass of +AbstractMetricsContext. The subclass should get its + configuration information using the getAttribute(attributeName) + method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + = getCount(). + @param newCapacity The new capacity in bytes.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + Index idx = startVector(...); + while (!idx.done()) { + .... // read element of a vector + idx.incr(); + } + + + @deprecated Replaced by Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + (DEPRECATED) Hadoop record I/O contains classes and a record description language + translator for simplifying serialization and deserialization of records in a + language-neutral manner. +

    + +

    + DEPRECATED: Replaced by Avro. +

    + +

    Introduction

    + + Software systems of any significant complexity require mechanisms for data +interchange with the outside world. These interchanges typically involve the +marshaling and unmarshaling of logical units of data to and from data streams +(files, network connections, memory buffers etc.). Applications usually have +some code for serializing and deserializing the data types that they manipulate +embedded in them. The work of serialization has several features that make +automatic code generation for it worthwhile. Given a particular output encoding +(binary, XML, etc.), serialization of primitive types and simple compositions +of primitives (structs, vectors etc.) is a very mechanical task. Manually +written serialization code can be susceptible to bugs especially when records +have a large number of fields or a record definition changes between software +versions. Lastly, it can be very useful for applications written in different +programming languages to be able to share and interchange data. This can be +made a lot easier by describing the data records manipulated by these +applications in a language agnostic manner and using the descriptions to derive +implementations of serialization in multiple target languages. + +This document describes Hadoop Record I/O, a mechanism that is aimed +at +
      +
    • enabling the specification of simple serializable data types (records) +
    • enabling the generation of code in multiple target languages for +marshaling and unmarshaling such types +
    • providing target language specific support that will enable application +programmers to incorporate generated code into their applications +
    + +The goals of Hadoop Record I/O are similar to those of mechanisms such as XDR, +ASN.1, PADS and ICE. While these systems all include a DDL that enables +the specification of most record types, they differ widely in what else they +focus on. The focus in Hadoop Record I/O is on data marshaling and +multi-lingual support. We take a translator-based approach to serialization. +Hadoop users have to describe their data in a simple data description +language. The Hadoop DDL translator rcc generates code that users +can invoke in order to read/write their data from/to simple stream +abstractions. Next we list explicitly some of the goals and non-goals of +Hadoop Record I/O. + + +

    Goals

    + +
      +
    • Support for commonly used primitive types. Hadoop should include as +primitives commonly used builtin types from programming languages we intend to +support. + +
    • Support for common data compositions (including recursive compositions). +Hadoop should support widely used composite types such as structs and +vectors. + +
    • Code generation in multiple target languages. Hadoop should be capable of +generating serialization code in multiple target languages and should be +easily extensible to new target languages. The initial target languages are +C++ and Java. + +
    • Support for generated target languages. Hadooop should include support +in the form of headers, libraries, packages for supported target languages +that enable easy inclusion and use of generated code in applications. + +
    • Support for multiple output encodings. Candidates include +packed binary, comma-separated text, XML etc. + +
    • Support for specifying record types in a backwards/forwards compatible +manner. This will probably be in the form of support for optional fields in +records. This version of the document does not include a description of the +planned mechanism, we intend to include it in the next iteration. + +
    + +

    Non-Goals

    + +
      +
    • Serializing existing arbitrary C++ classes. +
    • Serializing complex data structures such as trees, linked lists etc. +
    • Built-in indexing schemes, compression, or check-sums. +
    • Dynamic construction of objects from an XML schema. +
    + +The remainder of this document describes the features of Hadoop record I/O +in more detail. Section 2 describes the data types supported by the system. +Section 3 lays out the DDL syntax with some examples of simple records. +Section 4 describes the process of code generation with rcc. Section 5 +describes target language mappings and support for Hadoop types. We include a +fairly complete description of C++ mappings with intent to include Java and +others in upcoming iterations of this document. The last section talks about +supported output encodings. + + +

    Data Types and Streams

    + +This section describes the primitive and composite types supported by Hadoop. +We aim to support a set of types that can be used to simply and efficiently +express a wide range of record types in different programming languages. + +

    Primitive Types

    + +For the most part, the primitive types of Hadoop map directly to primitive +types in high level programming languages. Special cases are the +ustring (a Unicode string) and buffer types, which we believe +find wide use and which are usually implemented in library code and not +available as language built-ins. Hadoop also supplies these via library code +when a target language built-in is not present and there is no widely +adopted "standard" implementation. The complete list of primitive types is: + +
      +
    • byte: An 8-bit unsigned integer. +
    • boolean: A boolean value. +
    • int: A 32-bit signed integer. +
    • long: A 64-bit signed integer. +
    • float: A single precision floating point number as described by + IEEE-754. +
    • double: A double precision floating point number as described by + IEEE-754. +
    • ustring: A string consisting of Unicode characters. +
    • buffer: An arbitrary sequence of bytes. +
    + + +

    Composite Types

    +Hadoop supports a small set of composite types that enable the description +of simple aggregate types and containers. A composite type is serialized +by sequentially serializing it constituent elements. The supported +composite types are: + +
      + +
    • record: An aggregate type like a C-struct. This is a list of +typed fields that are together considered a single unit of data. A record +is serialized by sequentially serializing its constituent fields. In addition +to serialization a record has comparison operations (equality and less-than) +implemented for it, these are defined as memberwise comparisons. + +
    • vector: A sequence of entries of the same data type, primitive +or composite. + +
    • map: An associative container mapping instances of a key type to +instances of a value type. The key and value types may themselves be primitive +or composite types. + +
    + +

    Streams

    + +Hadoop generates code for serializing and deserializing record types to +abstract streams. For each target language Hadoop defines very simple input +and output stream interfaces. Application writers can usually develop +concrete implementations of these by putting a one method wrapper around +an existing stream implementation. + + +

    DDL Syntax and Examples

    + +We now describe the syntax of the Hadoop data description language. This is +followed by a few examples of DDL usage. + +

    Hadoop DDL Syntax

    + +
    
    +recfile = *include module *record
    +include = "include" path
    +path = (relative-path / absolute-path)
    +module = "module" module-name
    +module-name = name *("." name)
    +record := "class" name "{" 1*(field) "}"
    +field := type name ";"
    +name :=  ALPHA (ALPHA / DIGIT / "_" )*
    +type := (ptype / ctype)
    +ptype := ("byte" / "boolean" / "int" |
    +          "long" / "float" / "double"
    +          "ustring" / "buffer")
    +ctype := (("vector" "<" type ">") /
    +          ("map" "<" type "," type ">" ) ) / name)
    +
    + +A DDL file describes one or more record types. It begins with zero or +more include declarations, a single mandatory module declaration +followed by zero or more class declarations. The semantics of each of +these declarations are described below: + +
      + +
    • include: An include declaration specifies a DDL file to be +referenced when generating code for types in the current DDL file. Record types +in the current compilation unit may refer to types in all included files. +File inclusion is recursive. An include does not trigger code +generation for the referenced file. + +
    • module: Every Hadoop DDL file must have a single module +declaration that follows the list of includes and precedes all record +declarations. A module declaration identifies a scope within which +the names of all types in the current file are visible. Module names are +mapped to C++ namespaces, Java packages etc. in generated code. + +
    • class: Records types are specified through class +declarations. A class declaration is like a Java class declaration. +It specifies a named record type and a list of fields that constitute records +of the type. Usage is illustrated in the following examples. + +
    + +

    Examples

    + +
      +
    • A simple DDL file links.jr with just one record declaration. +
      
      +module links {
      +    class Link {
      +        ustring URL;
      +        boolean isRelative;
      +        ustring anchorText;
      +    };
      +}
      +
      + +
    • A DDL file outlinks.jr which includes another +
      
      +include "links.jr"
      +
      +module outlinks {
      +    class OutLinks {
      +        ustring baseURL;
      +        vector outLinks;
      +    };
      +}
      +
      +
    + +

    Code Generation

    + +The Hadoop translator is written in Java. Invocation is done by executing a +wrapper shell script named named rcc. It takes a list of +record description files as a mandatory argument and an +optional language argument (the default is Java) --language or +-l. Thus a typical invocation would look like: +
    
    +$ rcc -l C++  ...
    +
    + + +

    Target Language Mappings and Support

    + +For all target languages, the unit of code generation is a record type. +For each record type, Hadoop generates code for serialization and +deserialization, record comparison and access to record members. + +

    C++

    + +Support for including Hadoop generated C++ code in applications comes in the +form of a header file recordio.hh which needs to be included in source +that uses Hadoop types and a library librecordio.a which applications need +to be linked with. The header declares the Hadoop C++ namespace which defines +appropriate types for the various primitives, the basic interfaces for +records and streams and enumerates the supported serialization encodings. +Declarations of these interfaces and a description of their semantics follow: + +
    
    +namespace hadoop {
    +
    +  enum RecFormat { kBinary, kXML, kCSV };
    +
    +  class InStream {
    +  public:
    +    virtual ssize_t read(void *buf, size_t n) = 0;
    +  };
    +
    +  class OutStream {
    +  public:
    +    virtual ssize_t write(const void *buf, size_t n) = 0;
    +  };
    +
    +  class IOError : public runtime_error {
    +  public:
    +    explicit IOError(const std::string& msg);
    +  };
    +
    +  class IArchive;
    +  class OArchive;
    +
    +  class RecordReader {
    +  public:
    +    RecordReader(InStream& in, RecFormat fmt);
    +    virtual ~RecordReader(void);
    +
    +    virtual void read(Record& rec);
    +  };
    +
    +  class RecordWriter {
    +  public:
    +    RecordWriter(OutStream& out, RecFormat fmt);
    +    virtual ~RecordWriter(void);
    +
    +    virtual void write(Record& rec);
    +  };
    +
    +
    +  class Record {
    +  public:
    +    virtual std::string type(void) const = 0;
    +    virtual std::string signature(void) const = 0;
    +  protected:
    +    virtual bool validate(void) const = 0;
    +
    +    virtual void
    +    serialize(OArchive& oa, const std::string& tag) const = 0;
    +
    +    virtual void
    +    deserialize(IArchive& ia, const std::string& tag) = 0;
    +  };
    +}
    +
    + +
      + +
    • RecFormat: An enumeration of the serialization encodings supported +by this implementation of Hadoop. + +
    • InStream: A simple abstraction for an input stream. This has a +single public read method that reads n bytes from the stream into +the buffer buf. Has the same semantics as a blocking read system +call. Returns the number of bytes read or -1 if an error occurs. + +
    • OutStream: A simple abstraction for an output stream. This has a +single write method that writes n bytes to the stream from the +buffer buf. Has the same semantics as a blocking write system +call. Returns the number of bytes written or -1 if an error occurs. + +
    • RecordReader: A RecordReader reads records one at a time from +an underlying stream in a specified record format. The reader is instantiated +with a stream and a serialization format. It has a read method that +takes an instance of a record and deserializes the record from the stream. + +
    • RecordWriter: A RecordWriter writes records one at a +time to an underlying stream in a specified record format. The writer is +instantiated with a stream and a serialization format. It has a +write method that takes an instance of a record and serializes the +record to the stream. + +
    • Record: The base class for all generated record types. This has two +public methods type and signature that return the typename and the +type signature of the record. + +
    + +Two files are generated for each record file (note: not for each record). If a +record file is named "name.jr", the generated files are +"name.jr.cc" and "name.jr.hh" containing serialization +implementations and record type declarations respectively. + +For each record in the DDL file, the generated header file will contain a +class definition corresponding to the record type, method definitions for the +generated type will be present in the '.cc' file. The generated class will +inherit from the abstract class hadoop::Record. The DDL files +module declaration determines the namespace the record belongs to. +Each '.' delimited token in the module declaration results in the +creation of a namespace. For instance, the declaration module docs.links +results in the creation of a docs namespace and a nested +docs::links namespace. In the preceding examples, the Link class +is placed in the links namespace. The header file corresponding to +the links.jr file will contain: + +
    
    +namespace links {
    +  class Link : public hadoop::Record {
    +    // ....
    +  };
    +};
    +
    + +Each field within the record will cause the generation of a private member +declaration of the appropriate type in the class declaration, and one or more +acccessor methods. The generated class will implement the serialize and +deserialize methods defined in hadoop::Record+. It will also +implement the inspection methods type and signature from +hadoop::Record. A default constructor and virtual destructor will also +be generated. Serialization code will read/write records into streams that +implement the hadoop::InStream and the hadoop::OutStream interfaces. + +For each member of a record an accessor method is generated that returns +either the member or a reference to the member. For members that are returned +by value, a setter method is also generated. This is true for primitive +data members of the types byte, int, long, boolean, float and +double. For example, for a int field called MyField the folowing +code is generated. + +
    
    +...
    +private:
    +  int32_t mMyField;
    +  ...
    +public:
    +  int32_t getMyField(void) const {
    +    return mMyField;
    +  };
    +
    +  void setMyField(int32_t m) {
    +    mMyField = m;
    +  };
    +  ...
    +
    + +For a ustring or buffer or composite field. The generated code +only contains accessors that return a reference to the field. A const +and a non-const accessor are generated. For example: + +
    
    +...
    +private:
    +  std::string mMyBuf;
    +  ...
    +public:
    +
    +  std::string& getMyBuf() {
    +    return mMyBuf;
    +  };
    +
    +  const std::string& getMyBuf() const {
    +    return mMyBuf;
    +  };
    +  ...
    +
    + +

    Examples

    + +Suppose the inclrec.jr file contains: +
    
    +module inclrec {
    +    class RI {
    +        int      I32;
    +        double   D;
    +        ustring  S;
    +    };
    +}
    +
    + +and the testrec.jr file contains: + +
    
    +include "inclrec.jr"
    +module testrec {
    +    class R {
    +        vector VF;
    +        RI            Rec;
    +        buffer        Buf;
    +    };
    +}
    +
    + +Then the invocation of rcc such as: +
    
    +$ rcc -l c++ inclrec.jr testrec.jr
    +
    +will result in generation of four files: +inclrec.jr.{cc,hh} and testrec.jr.{cc,hh}. + +The inclrec.jr.hh will contain: + +
    
    +#ifndef _INCLREC_JR_HH_
    +#define _INCLREC_JR_HH_
    +
    +#include "recordio.hh"
    +
    +namespace inclrec {
    +  
    +  class RI : public hadoop::Record {
    +
    +  private:
    +
    +    int32_t      I32;
    +    double       D;
    +    std::string  S;
    +
    +  public:
    +
    +    RI(void);
    +    virtual ~RI(void);
    +
    +    virtual bool operator==(const RI& peer) const;
    +    virtual bool operator<(const RI& peer) const;
    +
    +    virtual int32_t getI32(void) const { return I32; }
    +    virtual void setI32(int32_t v) { I32 = v; }
    +
    +    virtual double getD(void) const { return D; }
    +    virtual void setD(double v) { D = v; }
    +
    +    virtual std::string& getS(void) const { return S; }
    +    virtual const std::string& getS(void) const { return S; }
    +
    +    virtual std::string type(void) const;
    +    virtual std::string signature(void) const;
    +
    +  protected:
    +
    +    virtual void serialize(hadoop::OArchive& a) const;
    +    virtual void deserialize(hadoop::IArchive& a);
    +  };
    +} // end namespace inclrec
    +
    +#endif /* _INCLREC_JR_HH_ */
    +
    +
    + +The testrec.jr.hh file will contain: + + +
    
    +
    +#ifndef _TESTREC_JR_HH_
    +#define _TESTREC_JR_HH_
    +
    +#include "inclrec.jr.hh"
    +
    +namespace testrec {
    +  class R : public hadoop::Record {
    +
    +  private:
    +
    +    std::vector VF;
    +    inclrec::RI        Rec;
    +    std::string        Buf;
    +
    +  public:
    +
    +    R(void);
    +    virtual ~R(void);
    +
    +    virtual bool operator==(const R& peer) const;
    +    virtual bool operator<(const R& peer) const;
    +
    +    virtual std::vector& getVF(void) const;
    +    virtual const std::vector& getVF(void) const;
    +
    +    virtual std::string& getBuf(void) const ;
    +    virtual const std::string& getBuf(void) const;
    +
    +    virtual inclrec::RI& getRec(void) const;
    +    virtual const inclrec::RI& getRec(void) const;
    +    
    +    virtual bool serialize(hadoop::OutArchive& a) const;
    +    virtual bool deserialize(hadoop::InArchive& a);
    +    
    +    virtual std::string type(void) const;
    +    virtual std::string signature(void) const;
    +  };
    +}; // end namespace testrec
    +#endif /* _TESTREC_JR_HH_ */
    +
    +
    + +

    Java

    + +Code generation for Java is similar to that for C++. A Java class is generated +for each record type with private members corresponding to the fields. Getters +and setters for fields are also generated. Some differences arise in the +way comparison is expressed and in the mapping of modules to packages and +classes to files. For equality testing, an equals method is generated +for each record type. As per Java requirements a hashCode method is also +generated. For comparison a compareTo method is generated for each +record type. This has the semantics as defined by the Java Comparable +interface, that is, the method returns a negative integer, zero, or a positive +integer as the invoked object is less than, equal to, or greater than the +comparison parameter. + +A .java file is generated per record type as opposed to per DDL +file as in C++. The module declaration translates to a Java +package declaration. The module name maps to an identical Java package +name. In addition to this mapping, the DDL compiler creates the appropriate +directory hierarchy for the package and places the generated .java +files in the correct directories. + +

    Mapping Summary

    + +
    
    +DDL Type        C++ Type            Java Type 
    +
    +boolean         bool                boolean
    +byte            int8_t              byte
    +int             int32_t             int
    +long            int64_t             long
    +float           float               float
    +double          double              double
    +ustring         std::string         java.lang.String
    +buffer          std::string         org.apache.hadoop.record.Buffer
    +class type      class type          class type
    +vector    std::vector   java.util.ArrayList
    +map  std::map java.util.TreeMap
    +
    + +

    Data encodings

    + +This section describes the format of the data encodings supported by Hadoop. +Currently, three data encodings are supported, namely binary, CSV and XML. + +

    Binary Serialization Format

    + +The binary data encoding format is fairly dense. Serialization of composite +types is simply defined as a concatenation of serializations of the constituent +elements (lengths are included in vectors and maps). + +Composite types are serialized as follows: +
      +
    • class: Sequence of serialized members. +
    • vector: The number of elements serialized as an int. Followed by a +sequence of serialized elements. +
    • map: The number of key value pairs serialized as an int. Followed +by a sequence of serialized (key,value) pairs. +
    + +Serialization of primitives is more interesting, with a zero compression +optimization for integral types and normalization to UTF-8 for strings. +Primitive types are serialized as follows: + +
      +
    • byte: Represented by 1 byte, as is. +
    • boolean: Represented by 1-byte (0 or 1) +
    • int/long: Integers and longs are serialized zero compressed. +Represented as 1-byte if -120 <= value < 128. Otherwise, serialized as a +sequence of 2-5 bytes for ints, 2-9 bytes for longs. The first byte represents +the number of trailing bytes, N, as the negative number (-120-N). For example, +the number 1024 (0x400) is represented by the byte sequence 'x86 x04 x00'. +This doesn't help much for 4-byte integers but does a reasonably good job with +longs without bit twiddling. +
    • float/double: Serialized in IEEE 754 single and double precision +format in network byte order. This is the format used by Java. +
    • ustring: Serialized as 4-byte zero compressed length followed by +data encoded as UTF-8. Strings are normalized to UTF-8 regardless of native +language representation. +
    • buffer: Serialized as a 4-byte zero compressed length followed by the +raw bytes in the buffer. +
    + + +

    CSV Serialization Format

    + +The CSV serialization format has a lot more structure than the "standard" +Excel CSV format, but we believe the additional structure is useful because + +
      +
    • it makes parsing a lot easier without detracting too much from legibility +
    • the delimiters around composites make it obvious when one is reading a +sequence of Hadoop records +
    + +Serialization formats for the various types are detailed in the grammar that +follows. The notable feature of the formats is the use of delimiters for +indicating the certain field types. + +
      +
    • A string field begins with a single quote ('). +
    • A buffer field begins with a sharp (#). +
    • A class, vector or map begins with 's{', 'v{' or 'm{' respectively and +ends with '}'. +
    + +The CSV format can be described by the following grammar: + +
    
    +record = primitive / struct / vector / map
    +primitive = boolean / int / long / float / double / ustring / buffer
    +
    +boolean = "T" / "F"
    +int = ["-"] 1*DIGIT
    +long = ";" ["-"] 1*DIGIT
    +float = ["-"] 1*DIGIT "." 1*DIGIT ["E" / "e" ["-"] 1*DIGIT]
    +double = ";" ["-"] 1*DIGIT "." 1*DIGIT ["E" / "e" ["-"] 1*DIGIT]
    +
    +ustring = "'" *(UTF8 char except NULL, LF, % and , / "%00" / "%0a" / "%25" / "%2c" )
    +
    +buffer = "#" *(BYTE except NULL, LF, % and , / "%00" / "%0a" / "%25" / "%2c" )
    +
    +struct = "s{" record *("," record) "}"
    +vector = "v{" [record *("," record)] "}"
    +map = "m{" [*(record "," record)] "}"
    +
    + +

    XML Serialization Format

    + +The XML serialization format is the same used by Apache XML-RPC +(http://ws.apache.org/xmlrpc/types.html). This is an extension of the original +XML-RPC format and adds some additional data types. All record I/O types are +not directly expressible in this format, and access to a DDL is required in +order to convert these to valid types. All types primitive or composite are +represented by <value> elements. The particular XML-RPC type is +indicated by a nested element in the <value> element. The encoding for +records is always UTF-8. Primitive types are serialized as follows: + +
      +
    • byte: XML tag <ex:i1>. Values: 1-byte unsigned +integers represented in US-ASCII +
    • boolean: XML tag <boolean>. Values: "0" or "1" +
    • int: XML tags <i4> or <int>. Values: 4-byte +signed integers represented in US-ASCII. +
    • long: XML tag <ex:i8>. Values: 8-byte signed integers +represented in US-ASCII. +
    • float: XML tag <ex:float>. Values: Single precision +floating point numbers represented in US-ASCII. +
    • double: XML tag <double>. Values: Double precision +floating point numbers represented in US-ASCII. +
    • ustring: XML tag <;string>. Values: String values +represented as UTF-8. XML does not permit all Unicode characters in literal +data. In particular, NULLs and control chars are not allowed. Additionally, +XML processors are required to replace carriage returns with line feeds and to +replace CRLF sequences with line feeds. Programming languages that we work +with do not impose these restrictions on string types. To work around these +restrictions, disallowed characters and CRs are percent escaped in strings. +The '%' character is also percent escaped. +
    • buffer: XML tag <string&>. Values: Arbitrary binary +data. Represented as hexBinary, each byte is replaced by its 2-byte +hexadecimal representation. +
    + +Composite types are serialized as follows: + +
      +
    • class: XML tag <struct>. A struct is a sequence of +<member> elements. Each <member> element has a <name> +element and a <value> element. The <name> is a string that must +match /[a-zA-Z][a-zA-Z0-9_]*/. The value of the member is represented +by a <value> element. + +
    • vector: XML tag <array<. An <array> contains a +single <data> element. The <data> element is a sequence of +<value> elements each of which represents an element of the vector. + +
    • map: XML tag <array>. Same as vector. + +
    + +For example: + +
    
    +class {
    +  int           MY_INT;            // value 5
    +  vector MY_VEC;            // values 0.1, -0.89, 2.45e4
    +  buffer        MY_BUF;            // value '\00\n\tabc%'
    +}
    +
    + +is serialized as + +
    
    +<value>
    +  <struct>
    +    <member>
    +      <name>MY_INT</name>
    +      <value><i4>5</i4></value>
    +    </member>
    +    <member>
    +      <name>MY_VEC</name>
    +      <value>
    +        <array>
    +          <data>
    +            <value><ex:float>0.1</ex:float></value>
    +            <value><ex:float>-0.89</ex:float></value>
    +            <value><ex:float>2.45e4</ex:float></value>
    +          </data>
    +        </array>
    +      </value>
    +    </member>
    +    <member>
    +      <name>MY_BUF</name>
    +      <value><string>%00\n\tabc%25</string></value>
    +    </member>
    +  </struct>
    +</value> 
    +
    ]]> +
    +
    + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + Avro.]]> + + + + + + + + + Avro.]]> + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + Avro.]]> + + + + + + + + + Avro.]]> + + + + + + + + + + + + Avro.]]> + + + + + + (DEPRECATED) This package contains classes needed for code generation + from the hadoop record compiler. CppGenerator and JavaGenerator + are the main entry points from the parser. There are classes + corrsponding to every primitive type and compound type + included in Hadoop record I/O syntax. +

    + +

    + DEPRECATED: Replaced by Avro. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This task takes the given record definition files and compiles them into + java or c++ + files. It is then up to the user to compile the generated files. + +

    The task requires the file or the nested fileset element to be + specified. Optional attributes are language (set the output + language, default is "java"), + destdir (name of the destination directory for generated java/c++ + code, default is ".") and failonerror (specifies error handling + behavior. default is true). +

    Usage

    +
    + <recordcc
    +       destdir="${basedir}/gensrc"
    +       language="java">
    +   <fileset include="**\/*.jr" />
    + </recordcc>
    + 
    + + @deprecated Replaced by Avro.]]> +
    +
    + +
    + + + + + + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + (DEPRECATED) This package contains code generated by JavaCC from the + Hadoop record syntax file rcc.jj. For details about the + record file syntax please @see org.apache.hadoop.record. +

    + +

    + DEPRECATED: Replaced by Avro. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Clients and/or applications can use the provided Progressable + to explicitly report progress to the Hadoop framework. This is especially + important for operations which take an insignificant amount of time since, + in-lieu of the reported progress, the framework has to assume that an error + has occured and time-out the operation.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Class is to be obtained + @return the correctly typed Class of the given object.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ShellCommandExecutorshould be used in cases where the output + of the command needs no explicit parsing and where the command, working + directory and the environment remains unchanged. The output of the command + is stored as-is and is expected to be small.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tool, is the standard for any Map-Reduce tool/application. + The tool/application should delegate the handling of + + standard command-line options to {@link ToolRunner#run(Tool, String[])} + and only handle its custom arguments.

    + +

    Here is how a typical Tool is implemented:

    +

    +     public class MyApp extends Configured implements Tool {
    +     
    +       public int run(String[] args) throws Exception {
    +         // Configuration processed by ToolRunner
    +         Configuration conf = getConf();
    +         
    +         // Create a JobConf using the processed conf
    +         JobConf job = new JobConf(conf, MyApp.class);
    +         
    +         // Process custom command-line options
    +         Path in = new Path(args[1]);
    +         Path out = new Path(args[2]);
    +         
    +         // Specify various job-specific parameters     
    +         job.setJobName("my-app");
    +         job.setInputPath(in);
    +         job.setOutputPath(out);
    +         job.setMapperClass(MyMapper.class);
    +         job.setReducerClass(MyReducer.class);
    +
    +         // Submit the job, then poll for progress until the job is complete
    +         JobClient.runJob(job);
    +         return 0;
    +       }
    +       
    +       public static void main(String[] args) throws Exception {
    +         // Let ToolRunner handle generic command-line options 
    +         int res = ToolRunner.run(new Configuration(), new MyApp(), args);
    +         
    +         System.exit(res);
    +       }
    +     }
    + 

    + + @see GenericOptionsParser + @see ToolRunner]]> +
    +
    + + + + + + + + + + + + Tool by {@link Tool#run(String[])}, after + parsing with the given generic arguments. Uses the given + Configuration, or builds one if null. + + Sets the Tool's configuration with the possibly modified + version of the conf. + + @param conf Configuration for the Tool. + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + Tool with its Configuration. + + Equivalent to run(tool.getConf(), tool, args). + + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + + + ToolRunner can be used to run classes implementing + Tool interface. It works in conjunction with + {@link GenericOptionsParser} to parse the + + generic hadoop command line arguments and modifies the + Configuration of the Tool. The + application-specific options are passed along without being modified. +

    + + @see Tool + @see GenericOptionsParser]]> +
    +
    + + + + +
    + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Bloom filter, as defined by Bloom in 1970. +

    + The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by + the networking research community in the past decade thanks to the bandwidth efficiencies that it + offers for the transmission of set membership information between networked hosts. A sender encodes + the information into a bit vector, the Bloom filter, that is more compact than a conventional + representation. Computation and space costs for construction are linear in the number of elements. + The receiver uses the filter to test whether various elements are members of the set. Though the + filter will occasionally return a false positive, it will never return a false negative. When creating + the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size. + +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + + @see Space/Time Trade-Offs in Hash Coding with Allowable Errors]]> + + + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + this counting Bloom filter. +

    + Invariant: nothing happens if the specified key does not belong to this counter Bloom filter. + @param key The key to remove.]]> + + + + + + + + + + + + key -> count map. +

    NOTE: due to the bucket size of this filter, inserting the same + key more than 15 times will cause an overflow at all filter positions + associated with this key, and it will significantly increase the error + rate for this and other keys. For this reason the filter can only be + used to store small count values 0 <= N << 15. + @param key key to be tested + @return 0 if the key is not present. Otherwise, a positive value v will + be returned such that v == count with probability equal to the + error rate of this filter, and v > count otherwise. + Additionally, if the filter experienced an underflow as a result of + {@link #delete(Key)} operation, the return value may be lower than the + count with the probability of the false negative rate of such + filter.]]> + + + + + + + + + + + + + + + + + + + + + + counting Bloom filter, as defined by Fan et al. in a ToN + 2000 paper. +

    + A counting Bloom filter is an improvement to standard a Bloom filter as it + allows dynamic additions and deletions of set membership information. This + is achieved through the use of a counting vector instead of a bit vector. +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + + @see Summary cache: a scalable wide-area web cache sharing protocol]]> + + + + + + + + + + + + + + Builds an empty Dynamic Bloom filter. + @param vectorSize The number of bits in the vector. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}). + @param nr The threshold for the maximum number of keys to record in a + dynamic Bloom filter row.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + dynamic Bloom filter, as defined in the INFOCOM 2006 paper. +

    + A dynamic Bloom filter (DBF) makes use of a s * m bit matrix but + each of the s rows is a standard Bloom filter. The creation + process of a DBF is iterative. At the start, the DBF is a 1 * m + bit matrix, i.e., it is composed of a single standard Bloom filter. + It assumes that nr elements are recorded in the + initial bit vector, where nr <= n (n is + the cardinality of the set A to record in the filter). +

    + As the size of A grows during the execution of the application, + several keys must be inserted in the DBF. When inserting a key into the DBF, + one must first get an active Bloom filter in the matrix. A Bloom filter is + active when the number of recorded keys, nr, is + strictly less than the current cardinality of A, n. + If an active Bloom filter is found, the key is inserted and + nr is incremented by one. On the other hand, if there + is no active Bloom filter, a new one is created (i.e., a new row is added to + the matrix) according to the current size of A and the element + is added in this new Bloom filter and the nr value of + this new Bloom filter is set to one. A given key is said to belong to the + DBF if the k positions are set to one in one of the matrix rows. +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + @see BloomFilter A Bloom filter + + @see Theory and Network Applications of Dynamic Bloom Filters]]> + + + + + + + + + Builds a hash function that must obey to a given maximum number of returned values and a highest value. + @param maxValue The maximum highest returned value. + @param nbHash The number of resulting hashed values. + @param hashType type of the hashing function (see {@link Hash}).]]> + + + + + this hash function. A NOOP]]> + + + + + + + + + + + + + + + + + + + The idea is to randomly select a bit to reset.]]> + + + + + + The idea is to select the bit to reset that will generate the minimum + number of false negative.]]> + + + + + + The idea is to select the bit to reset that will remove the maximum number + of false positive.]]> + + + + + + The idea is to select the bit to reset that will, at the same time, remove + the maximum number of false positve while minimizing the amount of false + negative generated.]]> + + + + + Originally created by + European Commission One-Lab Project 034819.]]> + + + + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + this retouched Bloom filter. +

    + Invariant: if the false positive is null, nothing happens. + @param key The false positive key to add.]]> + + + + + + this retouched Bloom filter. + @param coll The collection of false positive.]]> + + + + + + this retouched Bloom filter. + @param keys The list of false positive.]]> + + + + + + this retouched Bloom filter. + @param keys The array of false positive.]]> + + + + + + + this retouched Bloom filter. + @param scheme The selective clearing scheme to apply.]]> + + + + + + + + + + + + retouched Bloom filter, as defined in the CoNEXT 2006 paper. +

    + It allows the removal of selected false positives at the cost of introducing + random false negatives, and with the benefit of eliminating some random false + positives at the same time. + +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + @see BloomFilter A Bloom filter + @see RemoveScheme The different selective clearing algorithms + + @see Retouched Bloom Filters: Allowing Networked Applications to Trade Off Selected False Positives Against False Negatives]]> + + + + + + + + diff --git a/aarch64/share/hadoop/common/jdiff/hadoop-core_0.22.0.xml b/aarch64/share/hadoop/common/jdiff/hadoop-core_0.22.0.xml new file mode 100644 index 0000000..b3130dc --- /dev/null +++ b/aarch64/share/hadoop/common/jdiff/hadoop-core_0.22.0.xml @@ -0,0 +1,28377 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + UnsupportedOperationException + @param key + @param newKeys + @param customMessage]]> + + + + + + + UnsupportedOperationException + + @param key Key that is to be deprecated + @param newKeys list of keys that take up the values of deprecated key]]> + + + + + + + + + + + + final. + + @param name resource to be added, the classpath is examined for a file + with that name.]]> + + + + + + final. + + @param url url of the resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + final. + + @param file file-path of resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + final. + + @param in InputStream to deserialize the object from.]]> + + + + + + + + + + + name property, null if + no such property exists. If the key is deprecated, it returns the value of + the first key which replaces the deprecated key and is not null + + Values are processed for variable expansion + before being returned. + + @param name the property name. + @return the value of the name or its replacing property, + or null if no such property exists.]]> + + + + + + name property as a trimmed String, + null if no such property exists. + If the key is deprecated, it returns the value of + the first key which replaces the deprecated key and is not null + + Values are processed for variable expansion + before being returned. + + @param name the property name. + @return the value of the name or its replacing property, + or null if no such property exists.]]> + + + + + + name property, without doing + variable expansion.If the key is + deprecated, it returns the value of the first key which replaces + the deprecated key and is not null. + + @param name the property name. + @return the value of the name property or + its replacing property and null if no such property exists.]]> + + + + + + + value of the name property. If + name is deprecated, it sets the value to the keys + that replace the deprecated key. + + @param name property name. + @param value property value.]]> + + + + + + + + + + + + + + name. If the key is deprecated, + it returns the value of the first key which replaces the deprecated key + and is not null. + If no such property exists, + then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value, or defaultValue if the property + doesn't exist.]]> + + + + + + + name property as an int. + + If no such property exists, or if the specified value is not a valid + int, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as an int, + or defaultValue.]]> + + + + + + + name property to an int. + + @param name property name. + @param value int value of the property.]]> + + + + + + + name property as a long. + If no such property is specified, or if the specified value is not a valid + long, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a long, + or defaultValue.]]> + + + + + + + name property to a long. + + @param name property name. + @param value long value of the property.]]> + + + + + + + name property as a float. + If no such property is specified, or if the specified value is not a valid + float, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a float, + or defaultValue.]]> + + + + + + + name property to a float. + + @param name property name. + @param value property value.]]> + + + + + + + name property as a boolean. + If no such property is specified, or if the specified value is not a valid + boolean, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a boolean, + or defaultValue.]]> + + + + + + + name property to a boolean. + + @param name property name. + @param value boolean value of the property.]]> + + + + + + + + + + + + + + name property to the given type. This + is equivalent to set(<name>, value.toString()). + @param name property name + @param value new value]]> + + + + + + + + + + + + + + name property as a Pattern. + If no such property is specified, or if the specified value is not a valid + Pattern, then DefaultValue is returned. + + @param name property name + @param defaultValue default value + @return property value as a compiled Pattern, or defaultValue]]> + + + + + + + Pattern. + If the pattern is passed as null, sets the empty pattern which results in + further calls to getPattern(...) returning the default value. + + @param name property name + @param pattern new value]]> + + + + + + + + + + + + + name property as + a collection of Strings. + If no such property is specified then empty collection is returned. +

    + This is an optimized version of {@link #getStrings(String)} + + @param name property name. + @return property value as a collection of Strings.]]> + + + + + + name property as + an array of Strings. + If no such property is specified then null is returned. + + @param name property name. + @return property value as an array of Strings, + or null.]]> + + + + + + + name property as + an array of Strings. + If no such property is specified then default value is returned. + + @param name property name. + @param defaultValue The default value + @return property value as an array of Strings, + or default value.]]> + + + + + + name property as + a collection of Strings, trimmed of the leading and trailing whitespace. + If no such property is specified then empty Collection is returned. + + @param name property name. + @return property value as a collection of Strings, or empty Collection]]> + + + + + + name property as + an array of Strings, trimmed of the leading and trailing whitespace. + If no such property is specified then an empty array is returned. + + @param name property name. + @return property value as an array of trimmed Strings, + or empty array.]]> + + + + + + + name property as + an array of Strings, trimmed of the leading and trailing whitespace. + If no such property is specified then default value is returned. + + @param name property name. + @param defaultValue The default value + @return property value as an array of trimmed Strings, + or default value.]]> + + + + + + + name property as + as comma delimited values. + + @param name property name. + @param values The values]]> + + + + + + + + + + + + + + name property + as an array of Class. + The value of the property specifies a list of comma separated class names. + If no such property is specified, then defaultValue is + returned. + + @param name the property name. + @param defaultValue default value. + @return property value as a Class[], + or defaultValue.]]> + + + + + + + name property as a Class. + If no such property is specified, then defaultValue is + returned. + + @param name the class name. + @param defaultValue default value. + @return property value as a Class, + or defaultValue.]]> + + + + + + + + name property as a Class + implementing the interface specified by xface. + + If no such property is specified, then defaultValue is + returned. + + An exception is thrown if the returned class does not implement the named + interface. + + @param name the class name. + @param defaultValue default value. + @param xface the interface implemented by the named class. + @return property value as a Class, + or defaultValue.]]> + + + + + + + name property as a List + of objects implementing the interface specified by xface. + + An exception is thrown if any of the classes does not exist, or if it does + not implement the named interface. + + @param name the property name. + @param xface the interface implemented by the classes named by + name. + @return a List of objects implementing xface.]]> + + + + + + + + name property to the name of a + theClass implementing the given interface xface. + + An exception is thrown if theClass does not implement the + interface xface. + + @param name property name. + @param theClass property value. + @param xface the interface implemented by the named class.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + + + + + name. + + @param name configuration resource name. + @return an input stream attached to the resource.]]> + + + + + + name. + + @param name configuration resource name. + @return a reader attached to the resource.]]> + + + + + + + + + + + + + + + + + String + key-value pairs in the configuration. + + @return an iterator over the entries.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true to set quiet-mode on, false + to turn it off.]]> + + + + + + + + + + + + + + + + + + + + + with matching keys]]> + + + + Resources + +

    Configurations are specified by resources. A resource contains a set of + name/value pairs as XML data. Each resource is named by either a + String or by a {@link Path}. If named by a String, + then the classpath is examined for a file with that name. If named by a + Path, then the local filesystem is examined directly, without + referring to the classpath. + +

    Unless explicitly turned off, Hadoop by default specifies two + resources, loaded in-order from the classpath:

      +
    1. core-default.xml + : Read-only defaults for hadoop.
    2. +
    3. core-site.xml: Site-specific configuration for a given hadoop + installation.
    4. +
    + Applications may add additional resources, which are loaded + subsequent to these resources in the order they are added. + +

    Final Parameters

    + +

    Configuration parameters may be declared final. + Once a resource declares a value final, no subsequently-loaded + resource can alter that value. + For example, one might define a final parameter with: +

    +  <property>
    +    <name>dfs.client.buffer.dir</name>
    +    <value>/tmp/hadoop/dfs/client</value>
    +    <final>true</final>
    +  </property>
    + + Administrators typically define parameters as final in + core-site.xml for values that user applications may not alter. + +

    Variable Expansion

    + +

    Value strings are first processed for variable expansion. The + available properties are:

      +
    1. Other properties defined in this Configuration; and, if a name is + undefined here,
    2. +
    3. Properties in {@link System#getProperties()}.
    4. +
    + +

    For example, if a configuration resource contains the following property + definitions: +

    +  <property>
    +    <name>basedir</name>
    +    <value>/user/${user.name}</value>
    +  </property>
    +  
    +  <property>
    +    <name>tempdir</name>
    +    <value>${basedir}/tmp</value>
    +  </property>
    + + When conf.get("tempdir") is called, then ${basedir} + will be resolved to another property in this Configuration, while + ${user.name} would then ordinarily be resolved to the value + of the System property with that name.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + uri has syntax error]]> + + + + + + + + + + uri is + not found]]> + + + + + + + + + + + + + + + + + + + uri + determines a configuration property name, + fs.AbstractFileSystem.scheme.impl whose value names the + AbstractFileSystem class. + + The entire URI and conf is passed to the AbstractFileSystem factory method. + + @param uri for the file system to be created. + @param conf which is passed to the file system impl. + + @return file system for the given URI. + + @throws UnsupportedFileSystemException if the file system for + uri is not supported.]]> + + + + + + + + + + + + default port;]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + setReplication of FileSystem + @param src file name + @param replication new replication + @throws IOException + @return true if successful; + false if file does not exist or is a directory]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + core-default.xml]]> + + + + + + + + + + core-default.xml]]> + + + + + + + + + + core-default.xml]]> + + + + + + + + + + core-default.xml]]> + + + + + core-default.xml]]> + + + + + core-default.xml]]> + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + core-default.xml]]> + + + + + core-default.xml]]> + + + + + core-default.xml]]> + + + + + core-default.xml]]> + + + + + + + + + + core-default.xml]]> + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + core-default.xml]]> + + + + + + + + + + core-default.xml]]> + + + + + + + + + + core-default.xml]]> + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + core-default.xml]]> + + + + + + + + + + core-default.xml]]> + + + + + + + + + + core-default.xml]]> + + + + + + + + + + core-default.xml]]> + + + + + + + + + + core-default.xml]]> + + + + + + + + + + core-default.xml]]> + + + + + + + + + + core-default.xml]]> + + + + + + + + + + core-default.xml]]> + + + + + + + + + + core-default.xml]]> + + + + + + + + + + core-default.xml]]> + + + + + + + + + + core-default.xml]]> + + + + + core-default.xml]]> + + + + + core-default.xml]]> + + + + + + + + + + core-default.xml]]> + + + + + core-default.xml]]> + + + + + core-default.xml]]> + + + + + core-default.xml]]> + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + EnumSet.of(CreateFlag.CREATE, CreateFlag.APPEND) + + and pass it to {@link org.apache.hadoop.fs.FileSystem #create(Path f, FsPermission permission, + EnumSet flag, int bufferSize, short replication, long blockSize, + Progressable progress)}. + +

    + Combine {@link #OVERWRITE} with either {@link #CREATE} + or {@link #APPEND} does the same as only use + {@link #OVERWRITE}.
    + Combine {@link #CREATE} with {@link #APPEND} has the semantic: +

      +
    1. create the file if it does not exist; +
    2. append the file if it already exists. +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + defaultFsUri is not supported]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + NewWdir can be one of: +
      +
    • relative path: "foo/bar";
    • +
    • absolute without scheme: "/foo/bar"
    • +
    • fully qualified with scheme: "xx://auth/foo/bar"
    • +
    +
    + Illegal WDs: +
      +
    • relative with scheme: "xx:foo/bar"
    • +
    • non existent directory
    • +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • Progress - to report progress on the operation - default null +
  • Permission - umask is applied against permisssion: default is + FsPermissions:getDefault() + +
  • CreateParent - create missing parent path; default is to not + to create parents +
  • The defaults for the following are SS defaults of the file + server implementing the target path. Not all parameters make sense + for all kinds of file system - eg. localFS ignores Blocksize, + replication, checksum +
      +
    • BufferSize - buffersize used in FSDataOutputStream +
    • Blocksize - block size for file blocks +
    • ReplicationFactor - replication for blocks +
    • BytesPerChecksum - bytes per checksum +
    + + + @return {@link FSDataOutputStream} for created file + + @throws AccessControlException If access is denied + @throws FileAlreadyExistsException If file f already exists + @throws FileNotFoundException If parent of f does not exist + and createParent is false + @throws ParentNotDirectoryException If parent of f is not a + directory. + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path f is not valid]]> + + + + + + + + + + + + + + dir
    already + exists + @throws FileNotFoundException If parent of dir does not exist + and createParent is false + @throws ParentNotDirectoryException If parent of dir is not a + directory + @throws UnsupportedFileSystemException If file system for dir + is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path dir is not valid]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path f is invalid]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f + is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + f does not exist + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + +
  • Fails if src is a file and dst is a directory. +
  • Fails if src is a directory and dst is a file. +
  • Fails if the parent of dst does not exist or is a file. + +

    + If OVERWRITE option is not passed as an argument, rename fails if the dst + already exists. +

    + If OVERWRITE option is passed as an argument, rename overwrites the dst if + it is a file or an empty directory. Rename fails if dst is a non-empty + directory. +

    + Note that atomicity of rename is dependent on the file system + implementation. Please refer to the file system documentation for details +

    + + @param src path to be renamed + @param dst new path after rename + + @throws AccessControlException If access is denied + @throws FileAlreadyExistsException If dst already exists and + options has {@link Rename#OVERWRITE} option + false. + @throws FileNotFoundException If src does not exist + @throws ParentNotDirectoryException If parent of dst is not a + directory + @throws UnsupportedFileSystemException If file system for src + and dst is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f + is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws HadoopIllegalArgumentException If username or + groupname is invalid.]]> + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + f does not exist + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If the given path does not refer to a symlink + or an I/O error occurred]]> + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path f is invalid]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + Given a path referring to a symlink of form: + + <---X---> + fs://host/A/B/link + <-----Y-----> + + In this path X is the scheme and authority that identify the file system, + and Y is the path leading up to the final path component "link". If Y is + a symlink itself then let Y' be the target of Y and X' be the scheme and + authority of Y'. Symlink targets may: + + 1. Fully qualified URIs + + fs://hostX/A/B/file Resolved according to the target file system. + + 2. Partially qualified URIs (eg scheme but no host) + + fs:///A/B/file Resolved according to the target file sytem. Eg resolving + a symlink to hdfs:///A results in an exception because + HDFS URIs must be fully qualified, while a symlink to + file:///A will not since Hadoop's local file systems + require partially qualified URIs. + + 3. Relative paths + + path Resolves to [Y'][path]. Eg if Y resolves to hdfs://host/A and path + is "../B/file" then [Y'][path] is hdfs://host/B/file + + 4. Absolute paths + + path Resolves to [X'][path]. Eg if Y resolves hdfs://host/A/B and path + is "/file" then [X][path] is hdfs://host/file + + + @param target the target of the symbolic link + @param link the path to be created that points to target + @param createParent if true then missing parent dirs are created if + false then parent must exist + + + @throws AccessControlException If access is denied + @throws FileAlreadyExistsException If file linkcode> already exists + @throws FileNotFoundException If target does not exist + @throws ParentNotDirectoryException If parent of link is not a + directory. + @throws UnsupportedFileSystemException If file system for + target or link is not supported + @throws IOException If an I/O error occurred]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + + + + + + + + + + + *** Path Names *** +

    + + The Hadoop file system supports a URI name space and URI names. + It offers a forest of file systems that can be referenced using fully + qualified URIs. + Two common Hadoop file systems implementations are +

      +
    • the local file system: file:///path +
    • the hdfs file system hdfs://nnAddress:nnPort/path +
    + + While URI names are very flexible, it requires knowing the name or address + of the server. For convenience one often wants to access the default system + in one's environment without knowing its name/address. This has an + additional benefit that it allows one to change one's default fs + (e.g. admin moves application from cluster1 to cluster2). +

    + + To facilitate this, Hadoop supports a notion of a default file system. + The user can set his default file system, although this is + typically set up for you in your environment via your default config. + A default file system implies a default scheme and authority; slash-relative + names (such as /for/bar) are resolved relative to that default FS. + Similarly a user can also have working-directory-relative names (i.e. names + not starting with a slash). While the working directory is generally in the + same default FS, the wd can be in a different FS. +

    + Hence Hadoop path names can be one of: +

      +
    • fully qualified URI: scheme://authority/path +
    • slash relative names: /path relative to the default file system +
    • wd-relative names: path relative to the working dir +
    + Relative paths with scheme (scheme:foo/bar) are illegal. + +

    + ****The Role of the FileContext and configuration defaults**** +

    + The FileContext provides file namespace context for resolving file names; + it also contains the umask for permissions, In that sense it is like the + per-process file-related state in Unix system. + These two properties +

      +
    • default file system i.e your slash) +
    • umask +
    + in general, are obtained from the default configuration file + in your environment, (@see {@link Configuration}). + + No other configuration parameters are obtained from the default config as + far as the file context layer is concerned. All file system instances + (i.e. deployments of file systems) have default properties; we call these + server side (SS) defaults. Operation like create allow one to select many + properties: either pass them in as explicit parameters or use + the SS properties. +

    + The file system related SS defaults are +

      +
    • the home directory (default is "/user/userName") +
    • the initial wd (only for local fs) +
    • replication factor +
    • block size +
    • buffer size +
    • bytesPerChecksum (if used). +
    + +

    + *** Usage Model for the FileContext class *** +

    + Example 1: use the default config read from the $HADOOP_CONFIG/core.xml. + Unspecified values come from core-defaults.xml in the release jar. +

      +
    • myFContext = FileContext.getFileContext(); // uses the default config + // which has your default FS +
    • myFContext.create(path, ...); +
    • myFContext.setWorkingDir(path) +
    • myFContext.open (path, ...); +
    + Example 2: Get a FileContext with a specific URI as the default FS +
      +
    • myFContext = FileContext.getFileContext(URI) +
    • myFContext.create(path, ...); + ... +
    + Example 3: FileContext with local file system as the default +
      +
    • myFContext = FileContext.getLocalFSFileContext() +
    • myFContext.create(path, ...); +
    • ... +
    + Example 4: Use a specific config, ignoring $HADOOP_CONFIG + Generally you should not need use a config unless you are doing +
      +
    • configX = someConfigSomeOnePassedToYou. +
    • myFContext = getFileContext(configX); // configX is not changed, + // is passed down +
    • myFContext.create(path, ...); +
    • ... +
    ]]> + + + + + + + + + + + + + + path could + not be resolved + @throws IOException an I/O error occured]]> + + + + + + + + + + + + + + + + + + + + + + + + + + f is + not supported + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for + f is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for + pathPattern is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + files does not + exist + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f + is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + Return all the files that match filePattern and are not checksum + files. Results are sorted by their names. + +

    + A filename pattern is composed of regular characters and + special pattern matching characters, which are: + +

    +
    +
    +

    +

    ? +
    Matches any single character. + +

    +

    * +
    Matches zero or more characters. + +

    +

    [abc] +
    Matches a single character from character set + {a,b,c}. + +

    +

    [a-b] +
    Matches a single character from the character range + {a...b}. Note: character a must be + lexicographically less than or equal to character b. + +

    +

    [^a] +
    Matches a single char that is not from character set or range + {a}. Note that the ^ character must occur + immediately to the right of the opening bracket. + +

    +

    \c +
    Removes (escapes) any special meaning of character c. + +

    +

    {ab,cd} +
    Matches a string from the string set {ab, cd} + +

    +

    {ab,c{de,fh}} +
    Matches a string from string set {ab, cde, cfh} + +
    +
    +
    + + @param pathPattern a regular expression specifying a pth pattern + + @return an array of paths that match the path pattern + + @throws AccessControlException If access is denied + @throws UnsupportedFileSystemException If file system for + pathPattern is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> +
    +
    + + + + + + + + pathPattern is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + dst already exists + @throws FileNotFoundException If src does not exist + @throws ParentNotDirectoryException If parent of dst is not + a directory + @throws UnsupportedFileSystemException If file system for + src or dst is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path dst is invalid]]> + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + fs.scheme.class whose value names the FileSystem class. + The entire URI is passed to the FileSystem instance's initialize method.]]> + + + + + + + + + + + + + + + + + + fs.scheme.class whose value names the FileSystem class. + The entire URI is passed to the FileSystem instance's initialize method. + This always returns a new FileSystem object.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • Fails if src is a file and dst is a directory. +
  • Fails if src is a directory and dst is a file. +
  • Fails if the parent of dst does not exist or is a file. + +

    + If OVERWRITE option is not passed as an argument, rename fails + if the dst already exists. +

    + If OVERWRITE option is passed as an argument, rename overwrites + the dst if it is a file or an empty directory. Rename fails if dst is + a non-empty directory. +

    + Note that atomicity of rename is dependent on the file system + implementation. Please refer to the file system documentation for + details. This default implementation is non atomic. +

    + This method is deprecated since it is a temporary method added to + support the transition from FileSystem to FileContext for user + applications. + + @param src path to be renamed + @param dst new path after rename + @throws IOException on failure]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Return all the files that match filePattern and are not checksum + files. Results are sorted by their names. + +

    + A filename pattern is composed of regular characters and + special pattern matching characters, which are: + +

    +
    +
    +

    +

    ? +
    Matches any single character. + +

    +

    * +
    Matches zero or more characters. + +

    +

    [abc] +
    Matches a single character from character set + {a,b,c}. + +

    +

    [a-b] +
    Matches a single character from the character range + {a...b}. Note that character a must be + lexicographically less than or equal to character b. + +

    +

    [^a] +
    Matches a single character that is not from character set or range + {a}. Note that the ^ character must occur + immediately to the right of the opening bracket. + +

    +

    \c +
    Removes (escapes) any special meaning of character c. + +

    +

    {ab,cd} +
    Matches a string from the string set {ab, cd} + +

    +

    {ab,c{de,fh}} +
    Matches a string from the string set {ab, cde, cfh} + +
    +
    +
    + + @param pathPattern a regular expression specifying a pth pattern + + @return an array of paths that match the path pattern + @throws IOException]]> + + + + + + + + + + + + + + + + f does not exist + @throws IOException If an I/O error occurred]]> + + + + + + + + + f does not exist + @throws IOException if any I/O error occurred]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + All user code that may potentially use the Hadoop Distributed + File System should be written to use a FileSystem object. The + Hadoop DFS is a multi-machine system that appears as a single + disk. It's useful because of its fault tolerance and potentially + very large capacity. + +

    + The local implementation is {@link LocalFileSystem} and distributed + implementation is DistributedFileSystem.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FilterFileSystem contains + some other file system, which it uses as + its basic file system, possibly transforming + the data along the way or providing additional + functionality. The class FilterFileSystem + itself simply overrides all methods of + FileSystem with versions that + pass all requests to the contained file + system. Subclasses of FilterFileSystem + may further override some of these methods + and may also provide additional methods + and fields.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + path is invalid]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if and only if pathname + should be included]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the iteration has more elements. + + @return true if the iterator has more elements. + @throws IOException if any IO error occurs]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + trash feature. Files are moved to a user's trash + directory, a subdirectory of their home directory named ".Trash". Files are + initially moved to a current sub-directory of the trash directory. + Within that sub-directory their original path is preserved. Periodically + one may checkpoint the current trash and remove older checkpoints. (This + design permits trash management without enumeration of the full trash + content, without date support in the filesystem, and without clock + synchronization.)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} backed by an FTP client provided by Apache Commons Net. +

    ]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A client for the Kosmos filesystem (KFS) + +

    Introduction

    + +This pages describes how to use Kosmos Filesystem +( KFS ) as a backing +store with Hadoop. This page assumes that you have downloaded the +KFS software and installed necessary binaries as outlined in the KFS +documentation. + +

    Steps

    + +
      +
    • In the Hadoop conf directory edit core-site.xml, + add the following: +
      +<property>
      +  <name>fs.kfs.impl</name>
      +  <value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value>
      +  <description>The FileSystem for kfs: uris.</description>
      +</property>
      +            
      + +
    • In the Hadoop conf directory edit core-site.xml, + adding the following (with appropriate values for + <server> and <port>): +
      +<property>
      +  <name>fs.default.name</name>
      +  <value>kfs://<server:port></value> 
      +</property>
      +
      +<property>
      +  <name>fs.kfs.metaServerHost</name>
      +  <value><server></value>
      +  <description>The location of the KFS meta server.</description>
      +</property>
      +
      +<property>
      +  <name>fs.kfs.metaServerPort</name>
      +  <value><port></value>
      +  <description>The location of the meta server's port.</description>
      +</property>
      +
      +
      +
    • + +
    • Copy KFS's kfs-0.1.jar to Hadoop's lib directory. This step + enables Hadoop's to load the KFS specific modules. Note + that, kfs-0.1.jar was built when you compiled KFS source + code. This jar file contains code that calls KFS's client + library code via JNI; the native code is in KFS's + libkfsClient.so library. +
    • + +
    • When the Hadoop map/reduce trackers start up, those +processes (on local as well as remote nodes) will now need to load +KFS's libkfsClient.so library. To simplify this process, it is advisable to +store libkfsClient.so in an NFS accessible directory (similar to where +Hadoop binaries/scripts are stored); then, modify Hadoop's +conf/hadoop-env.sh adding the following line and providing suitable +value for <path>: +
      +export LD_LIBRARY_PATH=<path>
      +
      + + +
    • Start only the map/reduce trackers +
      + example: execute Hadoop's bin/start-mapred.sh
    • +
    +
    + +If the map/reduce job trackers start up, all file-I/O is done to KFS.]]> +
    +
    + + + + + + + + + + + + + + + + (cause==null ? null : cause.toString()) (which + typically contains the class and detail message of cause). + @param cause the cause (which is saved for later retrieval by the + {@link #getCause()} method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + mode is invalid]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is a tool for migrating data from an older to a newer version + of an S3 filesystem. +

    +

    + All files in the filesystem are migrated by re-writing the block metadata + - no datafiles are touched. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A block-based {@link FileSystem} backed by + Amazon S3. +

    + @see NativeS3FileSystem]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + A distributed, block-based implementation of {@link +org.apache.hadoop.fs.FileSystem} that uses Amazon S3 +as a backing store.

    + +

    +Files are stored in S3 as blocks (represented by +{@link org.apache.hadoop.fs.s3.Block}), which have an ID and a length. +Block metadata is stored in S3 as a small record (represented by +{@link org.apache.hadoop.fs.s3.INode}) using the URL-encoded +path string as a key. Inodes record the file type (regular file or directory) and the list of blocks. +This design makes it easy to seek to any given position in a file by reading the inode data to compute +which block to access, then using S3's support for +HTTP Range headers +to start streaming from the correct position. +Renames are also efficient since only the inode is moved (by a DELETE followed by a PUT since +S3 does not support renames). +

    +

    +For a single file /dir1/file1 which takes two blocks of storage, the file structure in S3 +would be something like this: +

    +
    +/
    +/dir1
    +/dir1/file1
    +block-6415776850131549260
    +block-3026438247347758425
    +
    +

    +Inodes start with a leading /, while blocks are prefixed with block-. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If f is a file, this method will make a single call to S3. + If f is a directory, this method will make a maximum of + (n / 1000) + 2 calls to S3, where n is the total number of + files and directories contained directly in f. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} for reading and writing files stored on + Amazon S3. + Unlike {@link org.apache.hadoop.fs.s3.S3FileSystem} this implementation + stores files on S3 in their + native form so they can be read by other S3 tools. + + A note about directories. S3 of course has no "native" support for them. + The idiom we choose then is: for any directory created by this class, + we use an empty object "#{dirpath}_$folder$" as a marker. + Further, to interoperate with other S3 tools, we also accept the following: + - an object "#{dirpath}/' denoting a directory marker + - if there exists any objects with the prefix "#{dirpath}/", then the + directory is said to exist + - if both a file with the name of a directory and a marker for that + directory exists, then the *file masks the directory*, and the directory + is never returned. +

    + @see org.apache.hadoop.fs.s3.S3FileSystem]]> +
    +
    + + + +A distributed implementation of {@link +org.apache.hadoop.fs.FileSystem} for reading and writing files on +Amazon S3. +Unlike {@link org.apache.hadoop.fs.s3.S3FileSystem}, which is block-based, +this implementation stores +files on S3 in their native form for interoperability with other S3 tools. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + nth value.]]> + + + + + + + + + + + + + + + + + + + + + nth value in the file.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + public class IntArrayWritable extends ArrayWritable { + public IntArrayWritable() { + super(IntWritable.class); + } + } + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a ByteWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the item + @param conf the configuration to store + @param item the object to be stored + @param keyName the name of the key to use + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param items the objects to be stored + @param keyName the name of the key to use + @throws IndexOutOfBoundsException if the items array is empty + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + DefaultStringifier offers convenience methods to store/load objects to/from + the configuration. + + @param the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a DoubleWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + value argument is null or + its size is zero, the elementType argument must not be null. If + the argument value's size is bigger than zero, the argument + elementType is not be used. + + @param value + @param elementType]]> + + + + + value should not be null + or empty. + + @param value]]> + + + + + + + + + + + + + + value and elementType. If the value argument + is null or its size is zero, the elementType argument must not be + null. If the argument value's size is bigger than zero, the + argument elementType is not be used. + + @param value + @param elementType]]> + + + + + + + + + + + + + + + + + + + + + + + + + o is an EnumSetWritable with the same value, + or both are null.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a FloatWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When two sequence files, which have same Key type but different Value + types, are mapped out to reduce, multiple Value types is not allowed. + In this case, this class can help you wrap instances with different types. +

    + +

    + Compared with ObjectWritable, this class is much more effective, + because ObjectWritable will append the class declaration as a String + into the output file in every Key-Value pair. +

    + +

    + Generic Writable implements {@link Configurable} interface, so that it will be + configured by the framework. The configuration is passed to the wrapped objects + implementing {@link Configurable} interface before deserialization. +

    + + how to use it:
    + 1. Write your own class, such as GenericObject, which extends GenericWritable.
    + 2. Implements the abstract method getTypes(), defines + the classes which will be wrapped in GenericObject in application. + Attention: this classes defined in getTypes() method, must + implement Writable interface. +

    + + The code looks like this: +
    + public class GenericObject extends GenericWritable {
    + 
    +   private static Class[] CLASSES = {
    +               ClassType1.class, 
    +               ClassType2.class,
    +               ClassType3.class,
    +               };
    +
    +   protected Class[] getTypes() {
    +       return CLASSES;
    +   }
    +
    + }
    + 
    + + @since Nov 8, 2006]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a IntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + closes the input and output streams + at the end. + @param in InputStrem to read from + @param out OutputStream to write to + @param conf the Configuration object]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ignore any {@link IOException} or + null pointers. Must only be used for cleanup in exception handlers. + @param log the log to record problems to at debug level. Can be null. + @param closeables the objects to close]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a LongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A map is a directory containing two files, the data file, + containing all keys and values in the map, and a smaller index + file, containing a fraction of the keys. The fraction is determined by + {@link Writer#getIndexInterval()}. + +

    The index file is read entirely into memory. Thus key implementations + should try to keep themselves small. + +

    Map files are created by adding entries in-order. To maintain a large + database, perform updates by copying the previous version of a database and + merging in a sorted change list, to create a new version of the database in + a new file. Sorting large change lists can be done with {@link + SequenceFile.Sorter}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key and + val. Returns true if such a pair exists and false when at + the end of the map]]> + + + + + + + + + + + + + + + + key or if it does not exist, at the first entry + after the named key. + +- * @param key - key that we're trying to find +- * @param val - data value if key is found +- * @return - the key that was the closest match or null if eof.]]> + + + + + + + + + key does not exist, return + the first entry that falls just before the key. Otherwise, + return the record that sorts just after. + @return - the key that was the closest match or null if eof.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is an MD5Hash whose digest contains the + same values.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + className by first finding + it in the specified conf. If the specified conf is null, + try load it directly.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link Comparator} that operates directly on byte representations of + objects. +

    + @param + @see DeserializerComparator]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + An example of such an attack is: +
      +
    1. Malicious user removes his task's syslog file, and puts a link to the + jobToken file of a target user.
    2. +
    3. Malicious user tries to open the syslog file via the servlet on the + tasktracker.
    4. +
    5. The tasktracker is unaware of the symlink, and simply streams the contents + of the jobToken file. The malicious user can now access potentially sensitive + map outputs, etc. of the target user's job.
    6. +
    + A similar attack is possible involving task log truncation, but in that case + due to an insecure write to a file. +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SequenceFiles are flat files consisting of binary key/value + pairs. + +

    SequenceFile provides {@link Writer}, {@link Reader} and + {@link Sorter} classes for writing, reading and sorting respectively.

    + + There are three SequenceFile Writers based on the + {@link CompressionType} used to compress key/value pairs: +
      +
    1. + Writer : Uncompressed records. +
    2. +
    3. + RecordCompressWriter : Record-compressed files, only compress + values. +
    4. +
    5. + BlockCompressWriter : Block-compressed files, both keys & + values are collected in 'blocks' + separately and compressed. The size of + the 'block' is configurable. +
    + +

    The actual compression algorithm used to compress key and/or values can be + specified by using the appropriate {@link CompressionCodec}.

    + +

    The recommended way is to use the static createWriter methods + provided by the SequenceFile to chose the preferred format.

    + +

    The {@link Reader} acts as the bridge and can read any of the above + SequenceFile formats.

    + +

    SequenceFile Formats

    + +

    Essentially there are 3 different formats for SequenceFiles + depending on the CompressionType specified. All of them share a + common header described below. + +

    +
      +
    • + version - 3 bytes of magic header SEQ, followed by 1 byte of actual + version number (e.g. SEQ4 or SEQ6) +
    • +
    • + keyClassName -key class +
    • +
    • + valueClassName - value class +
    • +
    • + compression - A boolean which specifies if compression is turned on for + keys/values in this file. +
    • +
    • + blockCompression - A boolean which specifies if block-compression is + turned on for keys/values in this file. +
    • +
    • + compression codec - CompressionCodec class which is used for + compression of keys and/or values (if compression is + enabled). +
    • +
    • + metadata - {@link Metadata} for this file. +
    • +
    • + sync - A sync marker to denote end of the header. +
    • +
    + +
    Uncompressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Value
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +
    Record-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Compressed Value
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +
    Block-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record Block +
        +
      • Compressed key-lengths block-size
      • +
      • Compressed key-lengths block
      • +
      • Compressed keys block-size
      • +
      • Compressed keys block
      • +
      • Compressed value-lengths block-size
      • +
      • Compressed value-lengths block
      • +
      • Compressed values block-size
      • +
      • Compressed values block
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +

    The compressed blocks of key lengths and value lengths consist of the + actual lengths of individual keys/values encoded in ZeroCompressedInteger + format.

    + + @see CompressionCodec]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + = 0. Otherwise, + the length is not available. + @return The opened stream. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key, skipping its + value. True if another entry exists, and false at end of file.]]> + + + + + + + + key and + val. Returns true if such a pair exists and false when at + end of file]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The position passed must be a position returned by {@link + SequenceFile.Writer#getLength()} when writing this file. To seek to an arbitrary + position, use {@link SequenceFile.Reader#sync(long)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SegmentDescriptor + @param segments the list of SegmentDescriptors + @param tmpDir the directory to write temporary files into + @return RawKeyValueIterator + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For best performance, applications should make sure that the {@link + Writable#readFields(DataInput)} implementation of their keys is + very efficient. In particular, it should avoid allocating memory.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This always returns a synchronized position. In other words, + immediately after calling {@link SequenceFile.Reader#seek(long)} with a position + returned by this method, {@link SequenceFile.Reader#next(Writable)} may be called. However + the key may be earlier in the file than key last written when this + method was called (e.g., with block-compression, it may be the first key + in the block that was being written when this method was called).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key. Returns + true if such a key exists and false when at the end of the set.]]> + + + + + + + key. + Returns key, or null if no match exists.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + position. Note that this + method avoids using the converter or doing String instatiation + @return the Unicode scalar value at position or -1 + if the position is invalid or points to a + trailing byte]]> + + + + + + + + + + what in the backing + buffer, starting as position start. The starting + position is measured in bytes and the return value is in + terms of byte position in the buffer. The backing buffer is + not converted to a string for this operation. + @return byte position of the first occurence of the search + string in the UTF-8 buffer or -1 if not found]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a Text with the same contents.]]> + + + + + + + + + + + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException.]]> + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException. + @return ByteBuffer: bytes stores at ByteBuffer.array() + and length is ByteBuffer.limit()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + In + addition, it provides methods for string traversal without converting the + byte array to a string.

    Also includes utilities for + serializing/deserialing a string, coding/decoding a string, checking if a + byte array contains valid UTF8 code, calculating the length of an encoded + string.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is useful when a class may evolve, so that instances written by the + old version of the class may still be processed by the new version. To + handle this situation, {@link #readFields(DataInput)} + implementations should catch {@link VersionMismatchException}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VIntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VLongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + out. + + @param out DataOuput to serialize this object into. + @throws IOException]]> + + + + + + + in. + +

    For efficiency, implementations should attempt to re-use storage in the + existing object where possible.

    + + @param in DataInput to deseriablize this object from. + @throws IOException]]> +
    + + + Any key or value type in the Hadoop Map-Reduce + framework implements this interface.

    + +

    Implementations typically implement a static read(DataInput) + method which constructs a new instance, calls {@link #readFields(DataInput)} + and returns the instance.

    + +

    Example:

    +

    +     public class MyWritable implements Writable {
    +       // Some data     
    +       private int counter;
    +       private long timestamp;
    +       
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +       
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +       
    +       public static MyWritable read(DataInput in) throws IOException {
    +         MyWritable w = new MyWritable();
    +         w.readFields(in);
    +         return w;
    +       }
    +     }
    + 

    ]]> +
    + + + + + + + + WritableComparables can be compared to each other, typically + via Comparators. Any type which is to be used as a + key in the Hadoop Map-Reduce framework should implement this + interface.

    + +

    Example:

    +

    +     public class MyWritableComparable implements
    +         WritableComparable<MyWritableComparable> {
    +
    +       // Some data
    +       private int counter;
    +       private long timestamp;
    +       
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +       
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +       
    +       public int compareTo(MyWritableComparable other) {
    +         int thisValue = this.counter;
    +         int thatValue = other.counter;
    +         return (thisValue < thatValue ? -1 : (thisValue == thatValue ? 0 : 1));
    +       }
    +     }
    + 

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The default implementation reads the data into two {@link + WritableComparable}s (using {@link + Writable#readFields(DataInput)}, then calls {@link + #compare(WritableComparable,WritableComparable)}.]]> + + + + + + + The default implementation uses the natural ordering, calling {@link + Comparable#compareTo(Object)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This base implemenation uses the natural ordering. To define alternate + orderings, override {@link #compare(WritableComparable,WritableComparable)}. + +

    One may optimize compare-intensive operations by overriding + {@link #compare(byte[],int,int,byte[],int,int)}. Static utility methods are + provided to assist in optimized implementations of this method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Enum type + @param in DataInput to read from + @param enumType Class type of Enum + @return Enum represented by String read from DataInput + @throws IOException]]> + + + + + + + + + + + + + + + + len number of bytes in input streamin + @param in input stream + @param len number of bytes to skip + @throws IOException when skipped less number of bytes]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CompressionCodec for which to get the + Compressor + @param conf the Configuration object which contains confs for creating or reinit the compressor + @return Compressor for the given + CompressionCodec from the pool or a new one]]> + + + + + + + + + CompressionCodec for which to get the + Decompressor + @return Decompressor for the given + CompressionCodec the pool or a new one]]> + + + + + + Compressor to be returned to the pool]]> + + + + + + Decompressor to be returned to the + pool]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Implementations are assumed to be buffered. This permits clients to + reposition the underlying input stream then call {@link #resetState()}, + without having to also synchronize client buffers.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + (Both native and non-native versions of various Decompressors require + that the data passed in via b[] remain unmodified until + the caller is explicitly notified--via {@link #needsInput()}--that the + buffer may be safely modified. With this requirement, an extra + buffer-copy can be avoided.) + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + {@link #setInput(byte[], int, int)} should be called in + order to provide more input.]]> + + + + + + + + + + + + + true if a preset dictionary is needed for decompression. + @return true if a preset dictionary is needed for decompression]]> + + + + + true if the end of the decompressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

  • "none" - No compression. +
  • "lzo" - LZO compression. +
  • "gz" - GZIP compression. + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • Block Compression. +
  • Named meta data blocks. +
  • Sorted or unsorted keys. +
  • Seek by key or by file offset. + + The memory footprint of a TFile includes the following: +
      +
    • Some constant overhead of reading or writing a compressed block. +
        +
      • Each compressed block requires one compression/decompression codec for + I/O. +
      • Temporary space to buffer the key. +
      • Temporary space to buffer the value (for TFile.Writer only). Values are + chunk encoded, so that we buffer at most one chunk of user data. By default, + the chunk buffer is 1MB. Reading chunked value does not require additional + memory. +
      +
    • TFile index, which is proportional to the total number of Data Blocks. + The total amount of memory needed to hold the index can be estimated as + (56+AvgKeySize)*NumBlocks. +
    • MetaBlock index, which is proportional to the total number of Meta + Blocks.The total amount of memory needed to hold the index for Meta Blocks + can be estimated as (40+AvgMetaBlockName)*NumMetaBlock. +
    +

    + The behavior of TFile can be customized by the following variables through + Configuration: +

      +
    • tfile.io.chunk.size: Value chunk size. Integer (in bytes). Default + to 1MB. Values of the length less than the chunk size is guaranteed to have + known value length in read time (See + {@link TFile.Reader.Scanner.Entry#isValueLengthKnown()}). +
    • tfile.fs.output.buffer.size: Buffer size used for + FSDataOutputStream. Integer (in bytes). Default to 256KB. +
    • tfile.fs.input.buffer.size: Buffer size used for + FSDataInputStream. Integer (in bytes). Default to 256KB. +
    +

    + Suggestions on performance optimization. +

      +
    • Minimum block size. We recommend a setting of minimum block size between + 256KB to 1MB for general usage. Larger block size is preferred if files are + primarily for sequential access. However, it would lead to inefficient random + access (because there are more data to decompress). Smaller blocks are good + for random access, but require more memory to hold the block index, and may + be slower to create (because we must flush the compressor stream at the + conclusion of each data block, which leads to an FS I/O flush). Further, due + to the internal caching in Compression codec, the smallest possible block + size would be around 20KB-30KB. +
    • The current implementation does not offer true multi-threading for + reading. The implementation uses FSDataInputStream seek()+read(), which is + shown to be much faster than positioned-read call in single thread mode. + However, it also means that if multiple threads attempt to access the same + TFile (using multiple scanners) simultaneously, the actual I/O is carried out + sequentially even if they access different DFS blocks. +
    • Compression codec. Use "none" if the data is not very compressable (by + compressable, I mean a compression ratio at least 2:1). Generally, use "lzo" + as the starting point for experimenting. "gz" overs slightly better + compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to + decompress, comparing to "lzo". +
    • File system buffering, if the underlying FSDataInputStream and + FSDataOutputStream is already adequately buffered; or if applications + reads/writes keys and values in large buffers, we can reduce the sizes of + input/output buffering in TFile layer by setting the configuration parameters + "tfile.fs.input.buffer.size" and "tfile.fs.output.buffer.size". +
    + + Some design rationale behind TFile can be found at Hadoop-3315.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + entry of the TFile. + @param endKey + End key of the scan. If null, scan up to the last entry + of the TFile. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Use {@link Scanner#atEnd()} to test whether the cursor is at the end + location of the scanner. +

    + Use {@link Scanner#advance()} to move the cursor to the next key-value + pair (or end if none exists). Use seekTo methods ( + {@link Scanner#seekTo(byte[])} or + {@link Scanner#seekTo(byte[], int, int)}) to seek to any arbitrary + location in the covered range (including backward seeking). Use + {@link Scanner#rewind()} to seek back to the beginning of the scanner. + Use {@link Scanner#seekToEnd()} to seek to the end of the scanner. +

    + Actual keys and values may be obtained through {@link Scanner.Entry} + object, which is obtained through {@link Scanner#entry()}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

  • Algorithmic comparator: binary comparators that is language + independent. Currently, only "memcmp" is supported. +
  • Language-specific comparator: binary comparators that can + only be constructed in specific language. For Java, the syntax + is "jclass:", followed by the class name of the RawComparator. + Currently, we only support RawComparators that can be + constructed through the default constructor (with no + parameters). Parameterized RawComparators such as + {@link WritableComparator} or + {@link JavaSerializationComparator} may not be directly used. + One should write a wrapper class that inherits from such classes + and use its default constructor to perform proper + initialization. + + @param conf + The configuration object. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + If an exception is thrown, the TFile will be in an inconsistent + state. The only legitimate call after that would be close]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Utils#writeVLong(out, n). + + @param out + output stream + @param n + The integer to be encoded + @throws IOException + @see Utils#writeVLong(DataOutput, long)]]> + + + + + + + + +
  • if n in [-32, 127): encode in one byte with the actual value. + Otherwise, +
  • if n in [-20*2^8, 20*2^8): encode in two bytes: byte[0] = n/256 - 52; + byte[1]=n&0xff. Otherwise, +
  • if n IN [-16*2^16, 16*2^16): encode in three bytes: byte[0]=n/2^16 - + 88; byte[1]=(n>>8)&0xff; byte[2]=n&0xff. Otherwise, +
  • if n in [-8*2^24, 8*2^24): encode in four bytes: byte[0]=n/2^24 - 112; + byte[1] = (n>>16)&0xff; byte[2] = (n>>8)&0xff; byte[3]=n&0xff. Otherwise: +
  • if n in [-2^31, 2^31): encode in five bytes: byte[0]=-125; byte[1] = + (n>>24)&0xff; byte[2]=(n>>16)&0xff; byte[3]=(n>>8)&0xff; byte[4]=n&0xff; +
  • if n in [-2^39, 2^39): encode in six bytes: byte[0]=-124; byte[1] = + (n>>32)&0xff; byte[2]=(n>>24)&0xff; byte[3]=(n>>16)&0xff; + byte[4]=(n>>8)&0xff; byte[5]=n&0xff +
  • if n in [-2^47, 2^47): encode in seven bytes: byte[0]=-123; byte[1] = + (n>>40)&0xff; byte[2]=(n>>32)&0xff; byte[3]=(n>>24)&0xff; + byte[4]=(n>>16)&0xff; byte[5]=(n>>8)&0xff; byte[6]=n&0xff; +
  • if n in [-2^55, 2^55): encode in eight bytes: byte[0]=-122; byte[1] = + (n>>48)&0xff; byte[2] = (n>>40)&0xff; byte[3]=(n>>32)&0xff; + byte[4]=(n>>24)&0xff; byte[5]=(n>>16)&0xff; byte[6]=(n>>8)&0xff; + byte[7]=n&0xff; +
  • if n in [-2^63, 2^63): encode in nine bytes: byte[0]=-121; byte[1] = + (n>>54)&0xff; byte[2] = (n>>48)&0xff; byte[3] = (n>>40)&0xff; + byte[4]=(n>>32)&0xff; byte[5]=(n>>24)&0xff; byte[6]=(n>>16)&0xff; + byte[7]=(n>>8)&0xff; byte[8]=n&0xff; + + + @param out + output stream + @param n + the integer number + @throws IOException]]> + + + + + + + (int)Utils#readVLong(in). + + @param in + input stream + @return the decoded integer + @throws IOException + + @see Utils#readVLong(DataInput)]]> + + + + + + + +
  • if (FB >= -32), return (long)FB; +
  • if (FB in [-72, -33]), return (FB+52)<<8 + NB[0]&0xff; +
  • if (FB in [-104, -73]), return (FB+88)<<16 + (NB[0]&0xff)<<8 + + NB[1]&0xff; +
  • if (FB in [-120, -105]), return (FB+112)<<24 + (NB[0]&0xff)<<16 + + (NB[1]&0xff)<<8 + NB[2]&0xff; +
  • if (FB in [-128, -121]), return interpret NB[FB+129] as a signed + big-endian integer. + + @param in + input stream + @return the decoded long integer. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @param cmp + Comparator for the key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @param cmp + Comparator for the key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + errno result codes.]]> + + + + + + + + + + + + + + + + + + + + + + An experimental {@link Serialization} for Java {@link Serializable} classes. +

    + @see JavaSerializationComparator]]> +
    +
    + + + + + + + + + + + + + A {@link RawComparator} that uses a {@link JavaSerialization} + {@link Deserializer} to deserialize objects that are then compared via + their {@link Comparable} interfaces. +

    + @param + @see JavaSerialization]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + +This package provides a mechanism for using different serialization frameworks +in Hadoop. The property "io.serializations" defines a list of +{@link org.apache.hadoop.io.serializer.Serialization}s that know how to create +{@link org.apache.hadoop.io.serializer.Serializer}s and +{@link org.apache.hadoop.io.serializer.Deserializer}s. +

    + +

    +To add a new serialization framework write an implementation of +{@link org.apache.hadoop.io.serializer.Serialization} and add its name to the +"io.serializations" property. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + avro.reflect.pkgs or implement + {@link AvroReflectSerializable} interface.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +This package provides Avro serialization in Hadoop. This can be used to +serialize/deserialize Avro types in Hadoop. +

    + +

    +Use {@link org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization} for +serialization of classes generated by Avro's 'specific' compiler. +

    + +

    +Use {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} for +other classes. +{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} work for +any class which is either in the package list configured via +{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization#AVRO_REFLECT_PACKAGES} +or implement {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerializable} +interface. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +The API is abstract so that it can be implemented on top of +a variety of metrics client libraries. The choice of +client library is a configuration option, and different +modules within the same application can use +different metrics implementation libraries. +

    +Sub-packages: +

    +
    org.apache.hadoop.metrics.spi
    +
    The abstract Server Provider Interface package. Those wishing to + integrate the metrics API with a particular metrics client library should + extend this package.
    + +
    org.apache.hadoop.metrics.file
    +
    An implementation package which writes the metric data to + a file, or sends it to the standard output stream.
    + +
    org.apache.hadoop.metrics.ganglia
    +
    An implementation package which sends metric data to + Ganglia.
    +
    + +

    Introduction to the Metrics API

    + +Here is a simple example of how to use this package to report a single +metric value: +
    +    private ContextFactory contextFactory = ContextFactory.getFactory();
    +    
    +    void reportMyMetric(float myMetric) {
    +        MetricsContext myContext = contextFactory.getContext("myContext");
    +        MetricsRecord myRecord = myContext.getRecord("myRecord");
    +        myRecord.setMetric("myMetric", myMetric);
    +        myRecord.update();
    +    }
    +
    + +In this example there are three names: +
    +
    myContext
    +
    The context name will typically identify either the application, or else a + module within an application or library.
    + +
    myRecord
    +
    The record name generally identifies some entity for which a set of + metrics are to be reported. For example, you could have a record named + "cacheStats" for reporting a number of statistics relating to the usage of + some cache in your application.
    + +
    myMetric
    +
    This identifies a particular metric. For example, you might have metrics + named "cache_hits" and "cache_misses". +
    +
    + +

    Tags

    + +In some cases it is useful to have multiple records with the same name. For +example, suppose that you want to report statistics about each disk on a computer. +In this case, the record name would be something like "diskStats", but you also +need to identify the disk which is done by adding a tag to the record. +The code could look something like this: +
    +    private MetricsRecord diskStats =
    +            contextFactory.getContext("myContext").getRecord("diskStats");
    +            
    +    void reportDiskMetrics(String diskName, float diskBusy, float diskUsed) {
    +        diskStats.setTag("diskName", diskName);
    +        diskStats.setMetric("diskBusy", diskBusy);
    +        diskStats.setMetric("diskUsed", diskUsed);
    +        diskStats.update();
    +    }
    +
    + +

    Buffering and Callbacks

    + +Data is not sent immediately to the metrics system when +MetricsRecord.update() is called. Instead it is stored in an +internal table, and the contents of the table are sent periodically. +This can be important for two reasons: +
      +
    1. It means that a programmer is free to put calls to this API in an + inner loop, since updates can be very frequent without slowing down + the application significantly.
    2. +
    3. Some implementations can gain efficiency by combining many metrics + into a single UDP message.
    4. +
    + +The API provides a timer-based callback via the +registerUpdater() method. The benefit of this +versus using java.util.Timer is that the callbacks will be done +immediately before sending the data, making the data as current as possible. + +

    Configuration

    + +It is possible to programmatically examine and modify configuration data +before creating a context, like this: +
    +    ContextFactory factory = ContextFactory.getFactory();
    +    ... examine and/or modify factory attributes ...
    +    MetricsContext context = factory.getContext("myContext");
    +
    +The factory attributes can be examined and modified using the following +ContextFactorymethods: +
      +
    • Object getAttribute(String attributeName)
    • +
    • String[] getAttributeNames()
    • +
    • void setAttribute(String name, Object value)
    • +
    • void removeAttribute(attributeName)
    • +
    + +

    +ContextFactory.getFactory() initializes the factory attributes by +reading the properties file hadoop-metrics.properties if it exists +on the class path. + +

    +A factory attribute named: +

    +contextName.class
    +
    +should have as its value the fully qualified name of the class to be +instantiated by a call of the CodeFactory method +getContext(contextName). If this factory attribute is not +specified, the default is to instantiate +org.apache.hadoop.metrics.file.FileContext. + +

    +Other factory attributes are specific to a particular implementation of this +API and are documented elsewhere. For example, configuration attributes for +the file and Ganglia implementations can be found in the javadoc for +their respective packages.]]> + + + + + + + + + + + + + + + + + + + + + + + fileName attribute, + if specified. Otherwise the data will be written to standard + output.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is configured by setting ContextFactory attributes which in turn + are usually configured through a properties file. All the attributes are + prefixed by the contextName. For example, the properties file might contain: +

    + myContextName.fileName=/tmp/metrics.log
    + myContextName.period=5
    + 
    ]]> +
    + + + + +These are the implementation specific factory attributes +(See ContextFactory.getFactory()): + +
    +
    contextName.fileName
    +
    The path of the file to which metrics in context contextName + are to be appended. If this attribute is not specified, the metrics + are written to standard output by default.
    + +
    contextName.period
    +
    The period in seconds on which the metric data is written to the + file.
    + +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Implementation of the metrics package that sends metric data to +Ganglia. +Programmers should not normally need to use this package directly. Instead +they should use org.hadoop.metrics. + +

    +These are the implementation specific factory attributes +(See ContextFactory.getFactory()): + +

    +
    contextName.servers
    +
    Space and/or comma separated sequence of servers to which UDP + messages should be sent.
    + +
    contextName.period
    +
    The period in seconds on which the metric data is sent to the + server(s).
    + +
    contextName.units.recordName.metricName
    +
    The units for the specified metric in the specified record.
    + +
    contextName.slope.recordName.metricName
    +
    The slope for the specified metric in the specified record.
    + +
    contextName.tmax.recordName.metricName
    +
    The tmax for the specified metric in the specified record.
    + +
    contextName.dmax.recordName.metricName
    +
    The dmax for the specified metric in the specified record.
    + +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + contextName.tableName. The returned map consists of + those attributes with the contextName and tableName stripped off.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + recordName. + Throws an exception if the metrics implementation is configured with a fixed + set of record names and recordName is not in that set. + + @param recordName the name of the record + @throws MetricsException if recordName conflicts with configuration data]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class implements the internal table of metric data, and the timer + on which data is to be sent to the metrics system. Subclasses must + override the abstract emitRecord method in order to transmit + the data.

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + update + and remove().]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hostname or hostname:port. If + the specs string is null, defaults to localhost:defaultPort. + + @return a list of InetSocketAddress objects.]]> + + + + + + + + + org.apache.hadoop.metrics.file and +org.apache.hadoop.metrics.ganglia.

    + +Plugging in an implementation involves writing a concrete subclass of +AbstractMetricsContext. The subclass should get its + configuration information using the getAttribute(attributeName) + method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + = getCount(). + @param newCapacity The new capacity in bytes.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + Index idx = startVector(...); + while (!idx.done()) { + .... // read element of a vector + idx.incr(); + } + + + @deprecated Replaced by Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + (DEPRECATED) Hadoop record I/O contains classes and a record description language + translator for simplifying serialization and deserialization of records in a + language-neutral manner. +

    + +

    + DEPRECATED: Replaced by Avro. +

    + +

    Introduction

    + + Software systems of any significant complexity require mechanisms for data +interchange with the outside world. These interchanges typically involve the +marshaling and unmarshaling of logical units of data to and from data streams +(files, network connections, memory buffers etc.). Applications usually have +some code for serializing and deserializing the data types that they manipulate +embedded in them. The work of serialization has several features that make +automatic code generation for it worthwhile. Given a particular output encoding +(binary, XML, etc.), serialization of primitive types and simple compositions +of primitives (structs, vectors etc.) is a very mechanical task. Manually +written serialization code can be susceptible to bugs especially when records +have a large number of fields or a record definition changes between software +versions. Lastly, it can be very useful for applications written in different +programming languages to be able to share and interchange data. This can be +made a lot easier by describing the data records manipulated by these +applications in a language agnostic manner and using the descriptions to derive +implementations of serialization in multiple target languages. + +This document describes Hadoop Record I/O, a mechanism that is aimed +at +
      +
    • enabling the specification of simple serializable data types (records) +
    • enabling the generation of code in multiple target languages for +marshaling and unmarshaling such types +
    • providing target language specific support that will enable application +programmers to incorporate generated code into their applications +
    + +The goals of Hadoop Record I/O are similar to those of mechanisms such as XDR, +ASN.1, PADS and ICE. While these systems all include a DDL that enables +the specification of most record types, they differ widely in what else they +focus on. The focus in Hadoop Record I/O is on data marshaling and +multi-lingual support. We take a translator-based approach to serialization. +Hadoop users have to describe their data in a simple data description +language. The Hadoop DDL translator rcc generates code that users +can invoke in order to read/write their data from/to simple stream +abstractions. Next we list explicitly some of the goals and non-goals of +Hadoop Record I/O. + + +

    Goals

    + +
      +
    • Support for commonly used primitive types. Hadoop should include as +primitives commonly used builtin types from programming languages we intend to +support. + +
    • Support for common data compositions (including recursive compositions). +Hadoop should support widely used composite types such as structs and +vectors. + +
    • Code generation in multiple target languages. Hadoop should be capable of +generating serialization code in multiple target languages and should be +easily extensible to new target languages. The initial target languages are +C++ and Java. + +
    • Support for generated target languages. Hadooop should include support +in the form of headers, libraries, packages for supported target languages +that enable easy inclusion and use of generated code in applications. + +
    • Support for multiple output encodings. Candidates include +packed binary, comma-separated text, XML etc. + +
    • Support for specifying record types in a backwards/forwards compatible +manner. This will probably be in the form of support for optional fields in +records. This version of the document does not include a description of the +planned mechanism, we intend to include it in the next iteration. + +
    + +

    Non-Goals

    + +
      +
    • Serializing existing arbitrary C++ classes. +
    • Serializing complex data structures such as trees, linked lists etc. +
    • Built-in indexing schemes, compression, or check-sums. +
    • Dynamic construction of objects from an XML schema. +
    + +The remainder of this document describes the features of Hadoop record I/O +in more detail. Section 2 describes the data types supported by the system. +Section 3 lays out the DDL syntax with some examples of simple records. +Section 4 describes the process of code generation with rcc. Section 5 +describes target language mappings and support for Hadoop types. We include a +fairly complete description of C++ mappings with intent to include Java and +others in upcoming iterations of this document. The last section talks about +supported output encodings. + + +

    Data Types and Streams

    + +This section describes the primitive and composite types supported by Hadoop. +We aim to support a set of types that can be used to simply and efficiently +express a wide range of record types in different programming languages. + +

    Primitive Types

    + +For the most part, the primitive types of Hadoop map directly to primitive +types in high level programming languages. Special cases are the +ustring (a Unicode string) and buffer types, which we believe +find wide use and which are usually implemented in library code and not +available as language built-ins. Hadoop also supplies these via library code +when a target language built-in is not present and there is no widely +adopted "standard" implementation. The complete list of primitive types is: + +
      +
    • byte: An 8-bit unsigned integer. +
    • boolean: A boolean value. +
    • int: A 32-bit signed integer. +
    • long: A 64-bit signed integer. +
    • float: A single precision floating point number as described by + IEEE-754. +
    • double: A double precision floating point number as described by + IEEE-754. +
    • ustring: A string consisting of Unicode characters. +
    • buffer: An arbitrary sequence of bytes. +
    + + +

    Composite Types

    +Hadoop supports a small set of composite types that enable the description +of simple aggregate types and containers. A composite type is serialized +by sequentially serializing it constituent elements. The supported +composite types are: + +
      + +
    • record: An aggregate type like a C-struct. This is a list of +typed fields that are together considered a single unit of data. A record +is serialized by sequentially serializing its constituent fields. In addition +to serialization a record has comparison operations (equality and less-than) +implemented for it, these are defined as memberwise comparisons. + +
    • vector: A sequence of entries of the same data type, primitive +or composite. + +
    • map: An associative container mapping instances of a key type to +instances of a value type. The key and value types may themselves be primitive +or composite types. + +
    + +

    Streams

    + +Hadoop generates code for serializing and deserializing record types to +abstract streams. For each target language Hadoop defines very simple input +and output stream interfaces. Application writers can usually develop +concrete implementations of these by putting a one method wrapper around +an existing stream implementation. + + +

    DDL Syntax and Examples

    + +We now describe the syntax of the Hadoop data description language. This is +followed by a few examples of DDL usage. + +

    Hadoop DDL Syntax

    + +
    
    +recfile = *include module *record
    +include = "include" path
    +path = (relative-path / absolute-path)
    +module = "module" module-name
    +module-name = name *("." name)
    +record := "class" name "{" 1*(field) "}"
    +field := type name ";"
    +name :=  ALPHA (ALPHA / DIGIT / "_" )*
    +type := (ptype / ctype)
    +ptype := ("byte" / "boolean" / "int" |
    +          "long" / "float" / "double"
    +          "ustring" / "buffer")
    +ctype := (("vector" "<" type ">") /
    +          ("map" "<" type "," type ">" ) ) / name)
    +
    + +A DDL file describes one or more record types. It begins with zero or +more include declarations, a single mandatory module declaration +followed by zero or more class declarations. The semantics of each of +these declarations are described below: + +
      + +
    • include: An include declaration specifies a DDL file to be +referenced when generating code for types in the current DDL file. Record types +in the current compilation unit may refer to types in all included files. +File inclusion is recursive. An include does not trigger code +generation for the referenced file. + +
    • module: Every Hadoop DDL file must have a single module +declaration that follows the list of includes and precedes all record +declarations. A module declaration identifies a scope within which +the names of all types in the current file are visible. Module names are +mapped to C++ namespaces, Java packages etc. in generated code. + +
    • class: Records types are specified through class +declarations. A class declaration is like a Java class declaration. +It specifies a named record type and a list of fields that constitute records +of the type. Usage is illustrated in the following examples. + +
    + +

    Examples

    + +
      +
    • A simple DDL file links.jr with just one record declaration. +
      
      +module links {
      +    class Link {
      +        ustring URL;
      +        boolean isRelative;
      +        ustring anchorText;
      +    };
      +}
      +
      + +
    • A DDL file outlinks.jr which includes another +
      
      +include "links.jr"
      +
      +module outlinks {
      +    class OutLinks {
      +        ustring baseURL;
      +        vector outLinks;
      +    };
      +}
      +
      +
    + +

    Code Generation

    + +The Hadoop translator is written in Java. Invocation is done by executing a +wrapper shell script named named rcc. It takes a list of +record description files as a mandatory argument and an +optional language argument (the default is Java) --language or +-l. Thus a typical invocation would look like: +
    
    +$ rcc -l C++  ...
    +
    + + +

    Target Language Mappings and Support

    + +For all target languages, the unit of code generation is a record type. +For each record type, Hadoop generates code for serialization and +deserialization, record comparison and access to record members. + +

    C++

    + +Support for including Hadoop generated C++ code in applications comes in the +form of a header file recordio.hh which needs to be included in source +that uses Hadoop types and a library librecordio.a which applications need +to be linked with. The header declares the Hadoop C++ namespace which defines +appropriate types for the various primitives, the basic interfaces for +records and streams and enumerates the supported serialization encodings. +Declarations of these interfaces and a description of their semantics follow: + +
    
    +namespace hadoop {
    +
    +  enum RecFormat { kBinary, kXML, kCSV };
    +
    +  class InStream {
    +  public:
    +    virtual ssize_t read(void *buf, size_t n) = 0;
    +  };
    +
    +  class OutStream {
    +  public:
    +    virtual ssize_t write(const void *buf, size_t n) = 0;
    +  };
    +
    +  class IOError : public runtime_error {
    +  public:
    +    explicit IOError(const std::string& msg);
    +  };
    +
    +  class IArchive;
    +  class OArchive;
    +
    +  class RecordReader {
    +  public:
    +    RecordReader(InStream& in, RecFormat fmt);
    +    virtual ~RecordReader(void);
    +
    +    virtual void read(Record& rec);
    +  };
    +
    +  class RecordWriter {
    +  public:
    +    RecordWriter(OutStream& out, RecFormat fmt);
    +    virtual ~RecordWriter(void);
    +
    +    virtual void write(Record& rec);
    +  };
    +
    +
    +  class Record {
    +  public:
    +    virtual std::string type(void) const = 0;
    +    virtual std::string signature(void) const = 0;
    +  protected:
    +    virtual bool validate(void) const = 0;
    +
    +    virtual void
    +    serialize(OArchive& oa, const std::string& tag) const = 0;
    +
    +    virtual void
    +    deserialize(IArchive& ia, const std::string& tag) = 0;
    +  };
    +}
    +
    + +
      + +
    • RecFormat: An enumeration of the serialization encodings supported +by this implementation of Hadoop. + +
    • InStream: A simple abstraction for an input stream. This has a +single public read method that reads n bytes from the stream into +the buffer buf. Has the same semantics as a blocking read system +call. Returns the number of bytes read or -1 if an error occurs. + +
    • OutStream: A simple abstraction for an output stream. This has a +single write method that writes n bytes to the stream from the +buffer buf. Has the same semantics as a blocking write system +call. Returns the number of bytes written or -1 if an error occurs. + +
    • RecordReader: A RecordReader reads records one at a time from +an underlying stream in a specified record format. The reader is instantiated +with a stream and a serialization format. It has a read method that +takes an instance of a record and deserializes the record from the stream. + +
    • RecordWriter: A RecordWriter writes records one at a +time to an underlying stream in a specified record format. The writer is +instantiated with a stream and a serialization format. It has a +write method that takes an instance of a record and serializes the +record to the stream. + +
    • Record: The base class for all generated record types. This has two +public methods type and signature that return the typename and the +type signature of the record. + +
    + +Two files are generated for each record file (note: not for each record). If a +record file is named "name.jr", the generated files are +"name.jr.cc" and "name.jr.hh" containing serialization +implementations and record type declarations respectively. + +For each record in the DDL file, the generated header file will contain a +class definition corresponding to the record type, method definitions for the +generated type will be present in the '.cc' file. The generated class will +inherit from the abstract class hadoop::Record. The DDL files +module declaration determines the namespace the record belongs to. +Each '.' delimited token in the module declaration results in the +creation of a namespace. For instance, the declaration module docs.links +results in the creation of a docs namespace and a nested +docs::links namespace. In the preceding examples, the Link class +is placed in the links namespace. The header file corresponding to +the links.jr file will contain: + +
    
    +namespace links {
    +  class Link : public hadoop::Record {
    +    // ....
    +  };
    +};
    +
    + +Each field within the record will cause the generation of a private member +declaration of the appropriate type in the class declaration, and one or more +acccessor methods. The generated class will implement the serialize and +deserialize methods defined in hadoop::Record+. It will also +implement the inspection methods type and signature from +hadoop::Record. A default constructor and virtual destructor will also +be generated. Serialization code will read/write records into streams that +implement the hadoop::InStream and the hadoop::OutStream interfaces. + +For each member of a record an accessor method is generated that returns +either the member or a reference to the member. For members that are returned +by value, a setter method is also generated. This is true for primitive +data members of the types byte, int, long, boolean, float and +double. For example, for a int field called MyField the folowing +code is generated. + +
    
    +...
    +private:
    +  int32_t mMyField;
    +  ...
    +public:
    +  int32_t getMyField(void) const {
    +    return mMyField;
    +  };
    +
    +  void setMyField(int32_t m) {
    +    mMyField = m;
    +  };
    +  ...
    +
    + +For a ustring or buffer or composite field. The generated code +only contains accessors that return a reference to the field. A const +and a non-const accessor are generated. For example: + +
    
    +...
    +private:
    +  std::string mMyBuf;
    +  ...
    +public:
    +
    +  std::string& getMyBuf() {
    +    return mMyBuf;
    +  };
    +
    +  const std::string& getMyBuf() const {
    +    return mMyBuf;
    +  };
    +  ...
    +
    + +

    Examples

    + +Suppose the inclrec.jr file contains: +
    
    +module inclrec {
    +    class RI {
    +        int      I32;
    +        double   D;
    +        ustring  S;
    +    };
    +}
    +
    + +and the testrec.jr file contains: + +
    
    +include "inclrec.jr"
    +module testrec {
    +    class R {
    +        vector VF;
    +        RI            Rec;
    +        buffer        Buf;
    +    };
    +}
    +
    + +Then the invocation of rcc such as: +
    
    +$ rcc -l c++ inclrec.jr testrec.jr
    +
    +will result in generation of four files: +inclrec.jr.{cc,hh} and testrec.jr.{cc,hh}. + +The inclrec.jr.hh will contain: + +
    
    +#ifndef _INCLREC_JR_HH_
    +#define _INCLREC_JR_HH_
    +
    +#include "recordio.hh"
    +
    +namespace inclrec {
    +  
    +  class RI : public hadoop::Record {
    +
    +  private:
    +
    +    int32_t      I32;
    +    double       D;
    +    std::string  S;
    +
    +  public:
    +
    +    RI(void);
    +    virtual ~RI(void);
    +
    +    virtual bool operator==(const RI& peer) const;
    +    virtual bool operator<(const RI& peer) const;
    +
    +    virtual int32_t getI32(void) const { return I32; }
    +    virtual void setI32(int32_t v) { I32 = v; }
    +
    +    virtual double getD(void) const { return D; }
    +    virtual void setD(double v) { D = v; }
    +
    +    virtual std::string& getS(void) const { return S; }
    +    virtual const std::string& getS(void) const { return S; }
    +
    +    virtual std::string type(void) const;
    +    virtual std::string signature(void) const;
    +
    +  protected:
    +
    +    virtual void serialize(hadoop::OArchive& a) const;
    +    virtual void deserialize(hadoop::IArchive& a);
    +  };
    +} // end namespace inclrec
    +
    +#endif /* _INCLREC_JR_HH_ */
    +
    +
    + +The testrec.jr.hh file will contain: + + +
    
    +
    +#ifndef _TESTREC_JR_HH_
    +#define _TESTREC_JR_HH_
    +
    +#include "inclrec.jr.hh"
    +
    +namespace testrec {
    +  class R : public hadoop::Record {
    +
    +  private:
    +
    +    std::vector VF;
    +    inclrec::RI        Rec;
    +    std::string        Buf;
    +
    +  public:
    +
    +    R(void);
    +    virtual ~R(void);
    +
    +    virtual bool operator==(const R& peer) const;
    +    virtual bool operator<(const R& peer) const;
    +
    +    virtual std::vector& getVF(void) const;
    +    virtual const std::vector& getVF(void) const;
    +
    +    virtual std::string& getBuf(void) const ;
    +    virtual const std::string& getBuf(void) const;
    +
    +    virtual inclrec::RI& getRec(void) const;
    +    virtual const inclrec::RI& getRec(void) const;
    +    
    +    virtual bool serialize(hadoop::OutArchive& a) const;
    +    virtual bool deserialize(hadoop::InArchive& a);
    +    
    +    virtual std::string type(void) const;
    +    virtual std::string signature(void) const;
    +  };
    +}; // end namespace testrec
    +#endif /* _TESTREC_JR_HH_ */
    +
    +
    + +

    Java

    + +Code generation for Java is similar to that for C++. A Java class is generated +for each record type with private members corresponding to the fields. Getters +and setters for fields are also generated. Some differences arise in the +way comparison is expressed and in the mapping of modules to packages and +classes to files. For equality testing, an equals method is generated +for each record type. As per Java requirements a hashCode method is also +generated. For comparison a compareTo method is generated for each +record type. This has the semantics as defined by the Java Comparable +interface, that is, the method returns a negative integer, zero, or a positive +integer as the invoked object is less than, equal to, or greater than the +comparison parameter. + +A .java file is generated per record type as opposed to per DDL +file as in C++. The module declaration translates to a Java +package declaration. The module name maps to an identical Java package +name. In addition to this mapping, the DDL compiler creates the appropriate +directory hierarchy for the package and places the generated .java +files in the correct directories. + +

    Mapping Summary

    + +
    
    +DDL Type        C++ Type            Java Type 
    +
    +boolean         bool                boolean
    +byte            int8_t              byte
    +int             int32_t             int
    +long            int64_t             long
    +float           float               float
    +double          double              double
    +ustring         std::string         java.lang.String
    +buffer          std::string         org.apache.hadoop.record.Buffer
    +class type      class type          class type
    +vector    std::vector   java.util.ArrayList
    +map  std::map java.util.TreeMap
    +
    + +

    Data encodings

    + +This section describes the format of the data encodings supported by Hadoop. +Currently, three data encodings are supported, namely binary, CSV and XML. + +

    Binary Serialization Format

    + +The binary data encoding format is fairly dense. Serialization of composite +types is simply defined as a concatenation of serializations of the constituent +elements (lengths are included in vectors and maps). + +Composite types are serialized as follows: +
      +
    • class: Sequence of serialized members. +
    • vector: The number of elements serialized as an int. Followed by a +sequence of serialized elements. +
    • map: The number of key value pairs serialized as an int. Followed +by a sequence of serialized (key,value) pairs. +
    + +Serialization of primitives is more interesting, with a zero compression +optimization for integral types and normalization to UTF-8 for strings. +Primitive types are serialized as follows: + +
      +
    • byte: Represented by 1 byte, as is. +
    • boolean: Represented by 1-byte (0 or 1) +
    • int/long: Integers and longs are serialized zero compressed. +Represented as 1-byte if -120 <= value < 128. Otherwise, serialized as a +sequence of 2-5 bytes for ints, 2-9 bytes for longs. The first byte represents +the number of trailing bytes, N, as the negative number (-120-N). For example, +the number 1024 (0x400) is represented by the byte sequence 'x86 x04 x00'. +This doesn't help much for 4-byte integers but does a reasonably good job with +longs without bit twiddling. +
    • float/double: Serialized in IEEE 754 single and double precision +format in network byte order. This is the format used by Java. +
    • ustring: Serialized as 4-byte zero compressed length followed by +data encoded as UTF-8. Strings are normalized to UTF-8 regardless of native +language representation. +
    • buffer: Serialized as a 4-byte zero compressed length followed by the +raw bytes in the buffer. +
    + + +

    CSV Serialization Format

    + +The CSV serialization format has a lot more structure than the "standard" +Excel CSV format, but we believe the additional structure is useful because + +
      +
    • it makes parsing a lot easier without detracting too much from legibility +
    • the delimiters around composites make it obvious when one is reading a +sequence of Hadoop records +
    + +Serialization formats for the various types are detailed in the grammar that +follows. The notable feature of the formats is the use of delimiters for +indicating the certain field types. + +
      +
    • A string field begins with a single quote ('). +
    • A buffer field begins with a sharp (#). +
    • A class, vector or map begins with 's{', 'v{' or 'm{' respectively and +ends with '}'. +
    + +The CSV format can be described by the following grammar: + +
    
    +record = primitive / struct / vector / map
    +primitive = boolean / int / long / float / double / ustring / buffer
    +
    +boolean = "T" / "F"
    +int = ["-"] 1*DIGIT
    +long = ";" ["-"] 1*DIGIT
    +float = ["-"] 1*DIGIT "." 1*DIGIT ["E" / "e" ["-"] 1*DIGIT]
    +double = ";" ["-"] 1*DIGIT "." 1*DIGIT ["E" / "e" ["-"] 1*DIGIT]
    +
    +ustring = "'" *(UTF8 char except NULL, LF, % and , / "%00" / "%0a" / "%25" / "%2c" )
    +
    +buffer = "#" *(BYTE except NULL, LF, % and , / "%00" / "%0a" / "%25" / "%2c" )
    +
    +struct = "s{" record *("," record) "}"
    +vector = "v{" [record *("," record)] "}"
    +map = "m{" [*(record "," record)] "}"
    +
    + +

    XML Serialization Format

    + +The XML serialization format is the same used by Apache XML-RPC +(http://ws.apache.org/xmlrpc/types.html). This is an extension of the original +XML-RPC format and adds some additional data types. All record I/O types are +not directly expressible in this format, and access to a DDL is required in +order to convert these to valid types. All types primitive or composite are +represented by <value> elements. The particular XML-RPC type is +indicated by a nested element in the <value> element. The encoding for +records is always UTF-8. Primitive types are serialized as follows: + +
      +
    • byte: XML tag <ex:i1>. Values: 1-byte unsigned +integers represented in US-ASCII +
    • boolean: XML tag <boolean>. Values: "0" or "1" +
    • int: XML tags <i4> or <int>. Values: 4-byte +signed integers represented in US-ASCII. +
    • long: XML tag <ex:i8>. Values: 8-byte signed integers +represented in US-ASCII. +
    • float: XML tag <ex:float>. Values: Single precision +floating point numbers represented in US-ASCII. +
    • double: XML tag <double>. Values: Double precision +floating point numbers represented in US-ASCII. +
    • ustring: XML tag <;string>. Values: String values +represented as UTF-8. XML does not permit all Unicode characters in literal +data. In particular, NULLs and control chars are not allowed. Additionally, +XML processors are required to replace carriage returns with line feeds and to +replace CRLF sequences with line feeds. Programming languages that we work +with do not impose these restrictions on string types. To work around these +restrictions, disallowed characters and CRs are percent escaped in strings. +The '%' character is also percent escaped. +
    • buffer: XML tag <string&>. Values: Arbitrary binary +data. Represented as hexBinary, each byte is replaced by its 2-byte +hexadecimal representation. +
    + +Composite types are serialized as follows: + +
      +
    • class: XML tag <struct>. A struct is a sequence of +<member> elements. Each <member> element has a <name> +element and a <value> element. The <name> is a string that must +match /[a-zA-Z][a-zA-Z0-9_]*/. The value of the member is represented +by a <value> element. + +
    • vector: XML tag <array<. An <array> contains a +single <data> element. The <data> element is a sequence of +<value> elements each of which represents an element of the vector. + +
    • map: XML tag <array>. Same as vector. + +
    + +For example: + +
    
    +class {
    +  int           MY_INT;            // value 5
    +  vector MY_VEC;            // values 0.1, -0.89, 2.45e4
    +  buffer        MY_BUF;            // value '\00\n\tabc%'
    +}
    +
    + +is serialized as + +
    
    +<value>
    +  <struct>
    +    <member>
    +      <name>MY_INT</name>
    +      <value><i4>5</i4></value>
    +    </member>
    +    <member>
    +      <name>MY_VEC</name>
    +      <value>
    +        <array>
    +          <data>
    +            <value><ex:float>0.1</ex:float></value>
    +            <value><ex:float>-0.89</ex:float></value>
    +            <value><ex:float>2.45e4</ex:float></value>
    +          </data>
    +        </array>
    +      </value>
    +    </member>
    +    <member>
    +      <name>MY_BUF</name>
    +      <value><string>%00\n\tabc%25</string></value>
    +    </member>
    +  </struct>
    +</value> 
    +
    ]]> +
    +
    + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + Avro.]]> + + + + + + + + + Avro.]]> + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + Avro.]]> + + + + + + + + + Avro.]]> + + + + + + + + + + + + Avro.]]> + + + + + + (DEPRECATED) This package contains classes needed for code generation + from the hadoop record compiler. CppGenerator and JavaGenerator + are the main entry points from the parser. There are classes + corrsponding to every primitive type and compound type + included in Hadoop record I/O syntax. +

    + +

    + DEPRECATED: Replaced by Avro. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This task takes the given record definition files and compiles them into + java or c++ + files. It is then up to the user to compile the generated files. + +

    The task requires the file or the nested fileset element to be + specified. Optional attributes are language (set the output + language, default is "java"), + destdir (name of the destination directory for generated java/c++ + code, default is ".") and failonerror (specifies error handling + behavior. default is true). +

    Usage

    +
    + <recordcc
    +       destdir="${basedir}/gensrc"
    +       language="java">
    +   <fileset include="**\/*.jr" />
    + </recordcc>
    + 
    + + @deprecated Replaced by Avro.]]> +
    +
    + +
    + + + + + + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + (DEPRECATED) This package contains code generated by JavaCC from the + Hadoop record syntax file rcc.jj. For details about the + record file syntax please @see org.apache.hadoop.record. +

    + +

    + DEPRECATED: Replaced by Avro. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + Avro.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the static class to find + @param the parent class of the array + @param cls the dynamic class to find + @param opts the list of options to look through + @return the first option that matches + @throws IOException]]> + + + + + + + the type of options + @param oldOpts the old options + @param newOpts the new options + @return a new array of options]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Clients and/or applications can use the provided Progressable + to explicitly report progress to the Hadoop framework. This is especially + important for operations which take an insignificant amount of time since, + in-lieu of the reported progress, the framework has to assume that an error + has occured and time-out the operation.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Class is to be obtained + @return the correctly typed Class of the given object.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ShellCommandExecutorshould be used in cases where the output + of the command needs no explicit parsing and where the command, working + directory and the environment remains unchanged. The output of the command + is stored as-is and is expected to be small.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tool, is the standard for any Map-Reduce tool/application. + The tool/application should delegate the handling of + + standard command-line options to {@link ToolRunner#run(Tool, String[])} + and only handle its custom arguments.

    + +

    Here is how a typical Tool is implemented:

    +

    +     public class MyApp extends Configured implements Tool {
    +     
    +       public int run(String[] args) throws Exception {
    +         // Configuration processed by ToolRunner
    +         Configuration conf = getConf();
    +         
    +         // Create a JobConf using the processed conf
    +         JobConf job = new JobConf(conf, MyApp.class);
    +         
    +         // Process custom command-line options
    +         Path in = new Path(args[1]);
    +         Path out = new Path(args[2]);
    +         
    +         // Specify various job-specific parameters     
    +         job.setJobName("my-app");
    +         job.setInputPath(in);
    +         job.setOutputPath(out);
    +         job.setMapperClass(MyMapper.class);
    +         job.setReducerClass(MyReducer.class);
    +
    +         // Submit the job, then poll for progress until the job is complete
    +         JobClient.runJob(job);
    +         return 0;
    +       }
    +       
    +       public static void main(String[] args) throws Exception {
    +         // Let ToolRunner handle generic command-line options 
    +         int res = ToolRunner.run(new Configuration(), new MyApp(), args);
    +         
    +         System.exit(res);
    +       }
    +     }
    + 

    + + @see GenericOptionsParser + @see ToolRunner]]> +
    +
    + + + + + + + + + + + + Tool by {@link Tool#run(String[])}, after + parsing with the given generic arguments. Uses the given + Configuration, or builds one if null. + + Sets the Tool's configuration with the possibly modified + version of the conf. + + @param conf Configuration for the Tool. + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + Tool with its Configuration. + + Equivalent to run(tool.getConf(), tool, args). + + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + + + ToolRunner can be used to run classes implementing + Tool interface. It works in conjunction with + {@link GenericOptionsParser} to parse the + + generic hadoop command line arguments and modifies the + Configuration of the Tool. The + application-specific options are passed along without being modified. +

    + + @see Tool + @see GenericOptionsParser]]> +
    +
    + + + + +
    + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Bloom filter, as defined by Bloom in 1970. +

    + The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by + the networking research community in the past decade thanks to the bandwidth efficiencies that it + offers for the transmission of set membership information between networked hosts. A sender encodes + the information into a bit vector, the Bloom filter, that is more compact than a conventional + representation. Computation and space costs for construction are linear in the number of elements. + The receiver uses the filter to test whether various elements are members of the set. Though the + filter will occasionally return a false positive, it will never return a false negative. When creating + the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size. + +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + + @see Space/Time Trade-Offs in Hash Coding with Allowable Errors]]> + + + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + this counting Bloom filter. +

    + Invariant: nothing happens if the specified key does not belong to this counter Bloom filter. + @param key The key to remove.]]> + + + + + + + + + + + + key -> count map. +

    NOTE: due to the bucket size of this filter, inserting the same + key more than 15 times will cause an overflow at all filter positions + associated with this key, and it will significantly increase the error + rate for this and other keys. For this reason the filter can only be + used to store small count values 0 <= N << 15. + @param key key to be tested + @return 0 if the key is not present. Otherwise, a positive value v will + be returned such that v == count with probability equal to the + error rate of this filter, and v > count otherwise. + Additionally, if the filter experienced an underflow as a result of + {@link #delete(Key)} operation, the return value may be lower than the + count with the probability of the false negative rate of such + filter.]]> + + + + + + + + + + + + + + + + + + + + + + counting Bloom filter, as defined by Fan et al. in a ToN + 2000 paper. +

    + A counting Bloom filter is an improvement to standard a Bloom filter as it + allows dynamic additions and deletions of set membership information. This + is achieved through the use of a counting vector instead of a bit vector. +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + + @see Summary cache: a scalable wide-area web cache sharing protocol]]> + + + + + + + + + + + + + + Builds an empty Dynamic Bloom filter. + @param vectorSize The number of bits in the vector. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}). + @param nr The threshold for the maximum number of keys to record in a + dynamic Bloom filter row.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + dynamic Bloom filter, as defined in the INFOCOM 2006 paper. +

    + A dynamic Bloom filter (DBF) makes use of a s * m bit matrix but + each of the s rows is a standard Bloom filter. The creation + process of a DBF is iterative. At the start, the DBF is a 1 * m + bit matrix, i.e., it is composed of a single standard Bloom filter. + It assumes that nr elements are recorded in the + initial bit vector, where nr <= n (n is + the cardinality of the set A to record in the filter). +

    + As the size of A grows during the execution of the application, + several keys must be inserted in the DBF. When inserting a key into the DBF, + one must first get an active Bloom filter in the matrix. A Bloom filter is + active when the number of recorded keys, nr, is + strictly less than the current cardinality of A, n. + If an active Bloom filter is found, the key is inserted and + nr is incremented by one. On the other hand, if there + is no active Bloom filter, a new one is created (i.e., a new row is added to + the matrix) according to the current size of A and the element + is added in this new Bloom filter and the nr value of + this new Bloom filter is set to one. A given key is said to belong to the + DBF if the k positions are set to one in one of the matrix rows. +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + @see BloomFilter A Bloom filter + + @see Theory and Network Applications of Dynamic Bloom Filters]]> + + + + + + + + + Builds a hash function that must obey to a given maximum number of returned values and a highest value. + @param maxValue The maximum highest returned value. + @param nbHash The number of resulting hashed values. + @param hashType type of the hashing function (see {@link Hash}).]]> + + + + + this hash function. A NOOP]]> + + + + + + + + + + + + + + + + + + + The idea is to randomly select a bit to reset.]]> + + + + + + The idea is to select the bit to reset that will generate the minimum + number of false negative.]]> + + + + + + The idea is to select the bit to reset that will remove the maximum number + of false positive.]]> + + + + + + The idea is to select the bit to reset that will, at the same time, remove + the maximum number of false positve while minimizing the amount of false + negative generated.]]> + + + + + Originally created by + European Commission One-Lab Project 034819.]]> + + + + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + this retouched Bloom filter. +

    + Invariant: if the false positive is null, nothing happens. + @param key The false positive key to add.]]> + + + + + + this retouched Bloom filter. + @param coll The collection of false positive.]]> + + + + + + this retouched Bloom filter. + @param keys The list of false positive.]]> + + + + + + this retouched Bloom filter. + @param keys The array of false positive.]]> + + + + + + + this retouched Bloom filter. + @param scheme The selective clearing scheme to apply.]]> + + + + + + + + + + + + retouched Bloom filter, as defined in the CoNEXT 2006 paper. +

    + It allows the removal of selected false positives at the cost of introducing + random false negatives, and with the benefit of eliminating some random false + positives at the same time. + +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + @see BloomFilter A Bloom filter + @see RemoveScheme The different selective clearing algorithms + + @see Retouched Bloom Filters: Allowing Networked Applications to Trade Off Selected False Positives Against False Negatives]]> + + + + + + + + diff --git a/aarch64/share/hadoop/common/jdiff/hadoop_0.17.0.xml b/aarch64/share/hadoop/common/jdiff/hadoop_0.17.0.xml new file mode 100644 index 0000000..69dded3 --- /dev/null +++ b/aarch64/share/hadoop/common/jdiff/hadoop_0.17.0.xml @@ -0,0 +1,43272 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + final. + + @param name resource to be added, the classpath is examined for a file + with that name.]]> + + + + + + final. + + @param url url of the resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + final. + + @param file file-path of resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + name property, null if + no such property exists. + + Values are processed for variable expansion + before being returned. + + @param name the property name. + @return the value of the name property, + or null if no such property exists.]]> + + + + + + name property, without doing + variable expansion. + + @param name the property name. + @return the value of the name property, + or null if no such property exists.]]> + + + + + + + value of the name property. + + @param name property name. + @param value property value.]]> + + + + + + + name property. If no such property + exists, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value, or defaultValue if the property + doesn't exist.]]> + + + + + + + name property as an int. + + If no such property exists, or if the specified value is not a valid + int, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as an int, + or defaultValue.]]> + + + + + + + name property to an int. + + @param name property name. + @param value int value of the property.]]> + + + + + + + name property as a long. + If no such property is specified, or if the specified value is not a valid + long, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a long, + or defaultValue.]]> + + + + + + + name property to a long. + + @param name property name. + @param value long value of the property.]]> + + + + + + + name property as a float. + If no such property is specified, or if the specified value is not a valid + float, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a float, + or defaultValue.]]> + + + + + + + name property as a boolean. + If no such property is specified, or if the specified value is not a valid + boolean, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a boolean, + or defaultValue.]]> + + + + + + + name property to a boolean. + + @param name property name. + @param value boolean value of the property.]]> + + + + + + + + + + + + + name property as + an array of Strings. + If no such property is specified then null is returned. + + @param name property name. + @return property value as an array of Strings, + or null.]]> + + + + + + + name property as + an array of Strings. + If no such property is specified then default value is returned. + + @param name property name. + @param defaultValue The default value + @return property value as an array of Strings, + or default value.]]> + + + + + + + name property as + as comma delimited values. + + @param name property name. + @param values The values]]> + + + + + + + + + + + + + + name property as a Class. + If no such property is specified, then defaultValue is + returned. + + @param name the class name. + @param defaultValue default value. + @return property value as a Class, + or defaultValue.]]> + + + + + + + + name property as a Class + implementing the interface specified by xface. + + If no such property is specified, then defaultValue is + returned. + + An exception is thrown if the returned class does not implement the named + interface. + + @param name the class name. + @param defaultValue default value. + @param xface the interface implemented by the named class. + @return property value as a Class, + or defaultValue.]]> + + + + + + + + name property to the name of a + theClass implementing the given interface xface. + + An exception is thrown if theClass does not implement the + interface xface. + + @param name property name. + @param theClass property value. + @param xface the interface implemented by the named class.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + + + + + name. + + @param name configuration resource name. + @return an input stream attached to the resource.]]> + + + + + + name. + + @param name configuration resource name. + @return a reader attached to the resource.]]> + + + + + String + key-value pairs in the configuration. + + @return an iterator over the entries.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + true to set quiet-mode on, false + to turn it off.]]> + + + + + + + + + + + Resources + +

    Configurations are specified by resources. A resource contains a set of + name/value pairs as XML data. Each resource is named by either a + String or by a {@link Path}. If named by a String, + then the classpath is examined for a file with that name. If named by a + Path, then the local filesystem is examined directly, without + referring to the classpath. + +

    Hadoop by default specifies two resources, loaded in-order from the + classpath:

      +
    1. hadoop-default.xml + : Read-only defaults for hadoop.
    2. +
    3. hadoop-site.xml: Site-specific configuration for a given hadoop + installation.
    4. +
    + Applications may add additional resources, which are loaded + subsequent to these resources in the order they are added. + +

    Final Parameters

    + +

    Configuration parameters may be declared final. + Once a resource declares a value final, no subsequently-loaded + resource can alter that value. + For example, one might define a final parameter with: +

    +  <property>
    +    <name>dfs.client.buffer.dir</name>
    +    <value>/tmp/hadoop/dfs/client</value>
    +    <final>true</final>
    +  </property>
    + + Administrators typically define parameters as final in + hadoop-site.xml for values that user applications may not alter. + +

    Variable Expansion

    + +

    Value strings are first processed for variable expansion. The + available properties are:

      +
    1. Other properties defined in this Configuration; and, if a name is + undefined here,
    2. +
    3. Properties in {@link System#getProperties()}.
    4. +
    + +

    For example, if a configuration resource contains the following property + definitions: +

    +  <property>
    +    <name>basedir</name>
    +    <value>/user/${user.name}</value>
    +  </property>
    +  
    +  <property>
    +    <name>tempdir</name>
    +    <value>${basedir}/tmp</value>
    +  </property>
    + + When conf.get("tempdir") is called, then ${basedir} + will be resolved to another property in this Configuration, while + ${user.name} would then ordinarily be resolved to the value + of the System property with that name.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The balancer is a tool that balances disk space usage on an HDFS cluster + when some datanodes become full or when new empty nodes join the cluster. + The tool is deployed as an application program that can be run by the + cluster administrator on a live HDFS cluster while applications + adding and deleting files. + +

    SYNOPSIS +

    + To start:
    +      bin/start-balancer.sh [-threshold ]
    +      Example: bin/ start-balancer.sh 
    +                     start the balancer with a default threshold of 10%
    +               bin/ start-balancer.sh -threshold 5
    +                     start the balancer with a threshold of 5%
    + To stop:
    +      bin/ stop-balancer.sh
    + 
    + +

    DESCRIPTION +

    The threshold parameter is a fraction in the range of (0%, 100%) with a + default value of 10%. The threshold sets a target for whether the cluster + is balanced. A cluster is balanced if for each datanode, the utilization + of the node (ratio of used space at the node to total capacity of the node) + differs from the utilization of the (ratio of used space in the cluster + to total capacity of the cluster) by no more than the threshold value. + The smaller the threshold, the more balanced a cluster will become. + It takes more time to run the balancer for small threshold values. + Also for a very small threshold the cluster may not be able to reach the + balanced state when applications write and delete files concurrently. + +

    The tool moves blocks from highly utilized datanodes to poorly + utilized datanodes iteratively. In each iteration a datanode moves or + receives no more than the lesser of 10G bytes or the threshold fraction + of its capacity. Each iteration runs no more than 20 minutes. + At the end of each iteration, the balancer obtains updated datanodes + information from the namenode. + +

    A system property that limits the balancer's use of bandwidth is + defined in the default configuration file: +

    + 
    +   dfs.balance.bandwidthPerSec
    +   1048576
    +   Specifies the maximum bandwidth that each datanode 
    + can utilize for the balancing purpose in term of the number of bytes 
    + per second. 
    + 
    + 
    + +

    This property determines the maximum speed at which a block will be + moved from one datanode to another. The default value is 1MB/s. The higher + the bandwidth, the faster a cluster can reach the balanced state, + but with greater competition with application processes. If an + administrator changes the value of this property in the configuration + file, the change is observed when HDFS is next restarted. + +

    MONITERING BALANCER PROGRESS +

    After the balancer is started, an output file name where the balancer + progress will be recorded is printed on the screen. The administrator + can monitor the running of the balancer by reading the output file. + The output shows the balancer's status iteration by iteration. In each + iteration it prints the starting time, the iteration number, the total + number of bytes that have been moved in the previous iterations, + the total number of bytes that are left to move in order for the cluster + to be balanced, and the number of bytes that are being moved in this + iteration. Normally "Bytes Already Moved" is increasing while "Bytes Left + To Move" is decreasing. + +

    Running multiple instances of the balancer in an HDFS cluster is + prohibited by the tool. + +

    The balancer automatically exits when any of the following five + conditions is satisfied: +

      +
    1. The cluster is balanced; +
    2. No block can be moved; +
    3. No block has been moved for five consecutive iterations; +
    4. An IOException occurs while communicating with the namenode; +
    5. Another balancer is running. +
    + +

    Upon exit, a balancer returns an exit code and prints one of the + following messages to the output file in corresponding to the above exit + reasons: +

      +
    1. The cluster is balanced. Exiting +
    2. No block can be moved. Exiting... +
    3. No block has been moved for 3 iterations. Exiting... +
    4. Received an IO exception: failure reason. Exiting... +
    5. Another balancer is running. Exiting... +
    + +

    The administrator can interrupt the execution of the balancer at any + time by running the command "stop-balancer.sh" on the machine where the + balancer is running.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + in]]> + + + + + + + out.]]> + + + + + + + + + + reset is true, then resets the checksum. + @return number of bytes written. Will be equal to getChecksumSize();]]> + + + + + + + + + reset is true, then resets the checksum. + @return number of bytes written. Will be equal to getChecksumSize();]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + stream of bytes (of BLOCK_SIZE or less) + + This info is stored on a local disk. The DataNode + reports the table's contents to the NameNode upon startup + and every so often afterwards. + + DataNodes spend their lives in an endless loop of asking + the NameNode for something to do. A NameNode cannot connect + to a DataNode directly; a NameNode simply returns values from + functions invoked by a DataNode. + + DataNodes maintain an open server socket so that client code + or other DataNodes can read/write data. The host/port for + this server is reported to the NameNode, which then sends that + information to clients or other DataNodes that might be interested.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The tool scans all files and directories, starting from an indicated + root path. The following abnormal conditions are detected and handled:

    +
      +
    • files with blocks that are completely missing from all datanodes.
      + In this case the tool can perform one of the following actions: +
        +
      • none ({@link NamenodeFsck#FIXING_NONE})
      • +
      • move corrupted files to /lost+found directory on DFS + ({@link NamenodeFsck#FIXING_MOVE}). Remaining data blocks are saved as a + block chains, representing longest consecutive series of valid blocks.
      • +
      • delete corrupted files ({@link NamenodeFsck#FIXING_DELETE})
      • +
      +
    • +
    • detect files with under-replicated or over-replicated blocks
    • +
    + Additionally, the tool collects a detailed overall DFS statistics, and + optionally can print detailed statistics on block locations and replication + factors of each file.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + :/data[/] HTTP/1.1 + }]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + :/listPaths[/][[&option]*] HTTP/1.1 + } + + Where option (default) in: + recursive ("no") + filter (".*") + exclude ("\..*\.crc") + + Response: A flat list of files/directories in the following format: + {@code + + + + + }]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The name-node can be started with one of the following startup options: +
      +
    • {@link FSConstants.StartupOption#REGULAR REGULAR} - normal startup
    • +
    • {@link FSConstants.StartupOption#FORMAT FORMAT} - format name node
    • +
    • {@link FSConstants.StartupOption#UPGRADE UPGRADE} - start the cluster + upgrade and create a snapshot of the current file system state
    • +
    • {@link FSConstants.StartupOption#ROLLBACK ROLLBACK} - roll the + cluster back to the previous state
    • +
    + The option is passed via configuration field: + dfs.namenode.startup + + The conf will be modified to reflect the actual ports on which + the NameNode is up and running if the user passes the port as + zero in the conf. + + @param conf confirguration + @throws IOException]]> +
    +
    + + + + zero.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + datanode whose + total size is size + + @param datanode on which blocks are located + @param size total size of blocks]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + blocksequence (namespace) + 2) block->machinelist ("inodes") + + The first table is stored on disk and is very precious. + The second table is rebuilt every time the NameNode comes + up. + + 'NameNode' refers to both this class as well as the 'NameNode server'. + The 'FSNamesystem' class actually performs most of the filesystem + management. The majority of the 'NameNode' class itself is concerned + with exposing the IPC interface to the outside world, plus some + configuration management. + + NameNode implements the ClientProtocol interface, which allows + clients to ask for DFS services. ClientProtocol is not + designed for direct use by authors of DFS client code. End-users + should instead use the org.apache.nutch.hadoop.fs.FileSystem class. + + NameNode also implements the DatanodeProtocol interface, used by + DataNode programs that actually store DFS data blocks. These + methods are invoked repeatedly and automatically by all the + DataNodes in a DFS deployment. + + NameNode also implements the NamenodeProtocol interface, used by + secondary namenodes or rebalancing processes to get partial namenode's + state, for example partial blocksMap etc.]]> + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The tool scans all files and directories, starting from an indicated + root path. The following abnormal conditions are detected and handled:

    +
      +
    • files with blocks that are completely missing from all datanodes.
      + In this case the tool can perform one of the following actions: +
        +
      • none ({@link #FIXING_NONE})
      • +
      • move corrupted files to /lost+found directory on DFS + ({@link #FIXING_MOVE}). Remaining data blocks are saved as a + block chains, representing longest consecutive series of valid blocks.
      • +
      • delete corrupted files ({@link #FIXING_DELETE})
      • +
      +
    • +
    • detect files with under-replicated or over-replicated blocks
    • +
    + Additionally, the tool collects a detailed overall DFS statistics, and + optionally can print detailed statistics on block locations and replication + factors of each file.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class has a number of metrics variables that are publicly accessible; + these variables (objects) have methods to update their values; + for example: +

    {@link #syncs}.inc()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A distributed implementation of {@link +org.apache.hadoop.fs.FileSystem}. This is loosely modelled after +Google's GFS.

    + +

    The most important difference is that unlike GFS, Hadoop DFS files +have strictly one writer at any one time. Bytes are always appended +to the end of the writer's stream. There is no notion of "record appends" +or "mutations" that are then checked or reordered. Writers simply emit +a byte stream. That byte stream is guaranteed to be stored in the +order written.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class has a number of metrics variables that are publicly accessible; + these variables (objects) have methods to update their values; + for example: +

    {@link #blocksRead}.inc()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For the statistics that are sampled and averaged, one must specify + a metrics context that does periodic update calls. Most do. + The default Null metrics context however does NOT. So if you aren't + using any other metrics context then you can turn on the viewing and averaging + of sampled metrics by specifying the following two lines + in the hadoop-meterics.properties file: +

    +        dfs.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
    +        dfs.period=10
    +  
    +

    + Note that the metrics are collected regardless of the context used. + The context with the update thread is used to average the data periodically. +

    + Name Node Status info is reported in another MBean + @see org.apache.hadoop.dfs.datanode.metrics.FSDatasetMBean]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Data Node runtime statistic info is report in another MBean + @see org.apache.hadoop.dfs.datanode.metrics.DataNodeStatisticsMBean]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Name Node runtime statistic info is report in another MBean + @see org.apache.hadoop.dfs.namenode.metrics.NameNodeStatisticsMBean]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For the statistics that are sampled and averaged, one must specify + a metrics context that does periodic update calls. Most do. + The default Null metrics context however does NOT. So if you aren't + using any other metrics context then you can turn on the viewing and averaging + of sampled metrics by specifying the following two lines + in the hadoop-meterics.properties file: +

    +        dfs.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
    +        dfs.period=10
    +  
    +

    + Note that the metrics are collected regardless of the context used. + The context with the update thread is used to average the data periodically. +

    + Name Node Status info is report in another MBean + @see org.apache.hadoop.dfs.namenode.metrics.FSNamesystemMBean]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DistributedCache is a facility provided by the Map-Reduce + framework to cache files (text, archives, jars etc.) needed by applications. +

    + +

    Applications specify the files, via urls (hdfs:// or http://) to be cached + via the {@link JobConf}. The DistributedCache assumes that the + files specified via hdfs:// urls are already present on the + {@link FileSystem} at the path specified by the url.

    + +

    The framework will copy the necessary files on to the slave node before + any tasks for the job are executed on that node. Its efficiency stems from + the fact that the files are only copied once per job and the ability to + cache archives which are un-archived on the slaves.

    + +

    DistributedCache can be used to distribute simple, read-only + data/text files and/or more complex types such as archives, jars etc. + Archives (zip files) are un-archived at the slave nodes. Jars maybe be + optionally added to the classpath of the tasks, a rudimentary software + distribution mechanism. Files have execution permissions. Optionally users + can also direct it to symlink the distributed cache file(s) into + the working directory of the task.

    + +

    DistributedCache tracks modification timestamps of the cache + files. Clearly the cache files should not be modified by the application + or externally while the job is executing.

    + +

    Here is an illustrative example on how to use the + DistributedCache:

    +

    +     // Setting up the cache for the application
    +     
    +     1. Copy the requisite files to the FileSystem:
    +     
    +     $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat  
    +     $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip  
    +     $ bin/hadoop fs -copyFromLocal mylib.jar /myapp/mylib.jar
    +     
    +     2. Setup the application's JobConf:
    +     
    +     JobConf job = new JobConf();
    +     DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"), 
    +                                   job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/map.zip", job);
    +     DistributedCache.addFileToClassPath(new Path("/myapp/mylib.jar"), job);
    +
    +     3. Use the cached files in the {@link Mapper} or {@link Reducer}:
    +     
    +     public static class MapClass extends MapReduceBase  
    +     implements Mapper<K, V, K, V> {
    +     
    +       private Path[] localArchives;
    +       private Path[] localFiles;
    +       
    +       public void configure(JobConf job) {
    +         // Get the cached archives/files
    +         localArchives = DistributedCache.getLocalCacheArchives(job);
    +         localFiles = DistributedCache.getLocalCacheFiles(job);
    +       }
    +       
    +       public void map(K key, V value, 
    +                       OutputCollector<K, V> output, Reporter reporter) 
    +       throws IOException {
    +         // Use data from the cached archives/files here
    +         // ...
    +         // ...
    +         output.collect(k, v);
    +       }
    +     }
    +     
    + 

    + + @see JobConf + @see JobClient]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BufferedFSInputStream + with the specified buffer size, + and saves its argument, the input stream + in, for later use. An internal + buffer array of length size + is created and stored in buf. + + @param in the underlying input stream. + @param size the buffer size. + @exception IllegalArgumentException if size <= 0.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + setReplication of FileSystem + @param src file name + @param replication new replication + @throws IOException + @return true if successful; + false if file does not exist or is a directory]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ']]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + fs.scheme.class whose value names the FileSystem class. + The entire URI is passed to the FileSystem instance's initialize method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + f is a file, return the size of the file; + If f is a directory, return the size of the directory tree + @deprecated Use {@link #getContentSummary(Path)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Return all the files that match filePattern and are not checksum + files. Results are sorted by their names. + +

    + A filename pattern is composed of regular characters and + special pattern matching characters, which are: + +

    +
    +
    +

    +

    ? +
    Matches any single character. + +

    +

    * +
    Matches zero or more characters. + +

    +

    [abc] +
    Matches a single character from character set + {a,b,c}. + +

    +

    [a-b] +
    Matches a single character from the character range + {a...b}. Note that character a must be + lexicographically less than or equal to character b. + +

    +

    [^a] +
    Matches a single character that is not from character set or range + {a}. Note that the ^ character must occur + immediately to the right of the opening bracket. + +

    +

    \c +
    Removes (escapes) any special meaning of character c. + +

    +

    {ab,cd} +
    Matches a string from the string set {ab, cd} + +

    +

    {ab,c{de,fh}} +
    Matches a string from the string set {ab, cde, cfh} + +
    +
    +
    + + @param pathPattern a regular expression specifying a pth pattern + + @return an array of paths that match the path pattern + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + All user code that may potentially use the Hadoop Distributed + File System should be written to use a FileSystem object. The + Hadoop DFS is a multi-machine system that appears as a single + disk. It's useful because of its fault tolerance and potentially + very large capacity. + +

    + The local implementation is {@link LocalFileSystem} and distributed + implementation is {@link DistributedFileSystem}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FilterFileSystem contains + some other file system, which it uses as + its basic file system, possibly transforming + the data along the way or providing additional + functionality. The class FilterFileSystem + itself simply overrides all methods of + FileSystem with versions that + pass all requests to the contained file + system. Subclasses of FilterFileSystem + may further override some of these methods + and may also provide additional methods + and fields.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + buf at offset + and checksum into checksum. + The method is used for implementing read, therefore, it should be optimized + for sequential reading + @param pos chunkPos + @param buf desitination buffer + @param offset offset in buf at which to store data + @param len maximun number of bytes to read + @return number of bytes read]]> + + + + + + + + + + + + + + + + + -1 if the end of the + stream is reached. + @exception IOException if an I/O error occurs.]]> + + + + + + + + + This method implements the general contract of the corresponding + {@link InputStream#read(byte[], int, int) read} method of + the {@link InputStream} class. As an additional + convenience, it attempts to read as many bytes as possible by repeatedly + invoking the read method of the underlying stream. This + iterated read continues until one of the following + conditions becomes true:

      + +
    • The specified number of bytes have been read, + +
    • The read method of the underlying stream returns + -1, indicating end-of-file. + +
    If the first read on the underlying stream returns + -1 to indicate end-of-file then this method returns + -1. Otherwise this method returns the number of bytes + actually read. + + @param b destination buffer. + @param off offset at which to start storing bytes. + @param len maximum number of bytes to read. + @return the number of bytes read, or -1 if the end of + the stream has been reached. + @exception IOException if an I/O error occurs. + ChecksumException if any checksum error occurs]]> +
    + + + + + + + + + + + + n bytes of data from the + input stream. + +

    This method may skip more bytes than are remaining in the backing + file. This produces no exception and the number of bytes skipped + may include some number of bytes that were beyond the EOF of the + backing file. Attempting to read from the stream after skipping past + the end will result in -1 indicating the end of the file. + +

    If n is negative, no bytes are skipped. + + @param n the number of bytes to be skipped. + @return the actual number of bytes skipped. + @exception IOException if an I/O error occurs. + ChecksumException if the chunk to skip to is corrupted]]> + + + + + + + This method may seek past the end of the file. + This produces no exception and an attempt to read from + the stream will result in -1 indicating the end of the file. + + @param pos the postion to seek to. + @exception IOException if an I/O error occurs. + ChecksumException if the chunk to seek to is corrupted]]> + + + + + + + + + + len bytes from + stm + + @param stm an input stream + @param buf destiniation buffer + @param offset offset at which to store data + @param len number of bytes to read + @return actual number of bytes read + @throws IOException if there is any IO error]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + len bytes from the specified byte array + starting at offset off and generate a checksum for + each data chunk. + +

    This method stores bytes from the given array into this + stream's buffer before it gets checksumed. The buffer gets checksumed + and flushed to the underlying output stream when all data + in a checksum chunk are in the buffer. If the buffer is empty and + requested length is at least as large as the size of next checksum chunk + size, this method will checksum and write the chunk directly + to the underlying output stream. Thus it avoids uneccessary data copy. + + @param b the data. + @param off the start offset in the data. + @param len the number of bytes to write. + @exception IOException if an I/O error occurs.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if and only if pathname + should be included]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + trash feature. Files are moved to a user's trash + directory, a subdirectory of their home directory named ".Trash". Files are + initially moved to a current sub-directory of the trash directory. + Within that sub-directory their original path is preserved. Periodically + one may checkpoint the current trash and remove older checkpoints. (This + design permits trash management without enumeration of the full trash + content, without date support in the filesystem, and without clock + synchronization.)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A client for the Kosmos filesystem (KFS) + +

    Introduction

    + +This pages describes how to use Kosmos Filesystem +( KFS ) as a backing +store with Hadoop. This page assumes that you have downloaded the +KFS software and installed necessary binaries as outlined in the KFS +documentation. + +

    Steps

    + +
      +
    • In the Hadoop conf directory edit hadoop-default.xml, + add the following: +
      +<property>
      +  <name>fs.kfs.impl</name>
      +  <value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value>
      +  <description>The FileSystem for kfs: uris.</description>
      +</property>
      +            
      + +
    • In the Hadoop conf directory edit hadoop-site.xml, + adding the following (with appropriate values for + <server> and <port>): +
      +<property>
      +  <name>fs.default.name</name>
      +  <value>kfs://<server:port></value> 
      +</property>
      +
      +<property>
      +  <name>fs.kfs.metaServerHost</name>
      +  <value><server></value>
      +  <description>The location of the KFS meta server.</description>
      +</property>
      +
      +<property>
      +  <name>fs.kfs.metaServerPort</name>
      +  <value><port></value>
      +  <description>The location of the meta server's port.</description>
      +</property>
      +
      +
      +
    • + +
    • Copy KFS's kfs-0.1.jar to Hadoop's lib directory. This step + enables Hadoop's to load the KFS specific modules. Note + that, kfs-0.1.jar was built when you compiled KFS source + code. This jar file contains code that calls KFS's client + library code via JNI; the native code is in KFS's + libkfsClient.so library. +
    • + +
    • When the Hadoop map/reduce trackers start up, those +processes (on local as well as remote nodes) will now need to load +KFS's libkfsClient.so library. To simplify this process, it is advisable to +store libkfsClient.so in an NFS accessible directory (similar to where +Hadoop binaries/scripts are stored); then, modify Hadoop's +conf/hadoop-env.sh adding the following line and providing suitable +value for <path>: +
      +export LD_LIBRARY_PATH=<path>
      +
      + + +
    • Start only the map/reduce trackers +
      + example: execute Hadoop's bin/start-mapred.sh
    • +
    +
    + +If the map/reduce job trackers start up, all file-I/O is done to KFS.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is a tool for migrating data from an older to a newer version + of an S3 filesystem. +

    +

    + All files in the filesystem are migrated by re-writing the block metadata + - no datafiles are touched. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} backed by Amazon S3. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + A distributed implementation of {@link +org.apache.hadoop.fs.FileSystem} that uses Amazon S3.

    + +

    +Files are stored in S3 as blocks (represented by +{@link org.apache.hadoop.fs.s3.Block}), which have an ID and a length. +Block metadata is stored in S3 as a small record (represented by +{@link org.apache.hadoop.fs.s3.INode}) using the URL-encoded +path string as a key. Inodes record the file type (regular file or directory) and the list of blocks. +This design makes it easy to seek to any given position in a file by reading the inode data to compute +which block to access, then using S3's support for +HTTP Range headers +to start streaming from the correct position. +Renames are also efficient since only the inode is moved (by a DELETE followed by a PUT since +S3 does not support renames). +

    +

    +For a single file /dir1/file1 which takes two blocks of storage, the file structure in S3 +would be something like this: +

    +
    +/
    +/dir1
    +/dir1/file1
    +block-6415776850131549260
    +block-3026438247347758425
    +
    +

    +Inodes start with a leading /, while blocks are prefixed with block-. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + nth value.]]> + + + + + + + + + + + + + + + + + + + + + nth value in the file.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + public class IntArrayWritable extends ArrayWritable { + public IntArrayWritable() { + super(IntWritable.class); + } + } + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new DataInputStream and + ByteArrayInputStream each time data is read. + +

    Typical usage is something like the following:

    +
    + DataInputBuffer buffer = new DataInputBuffer();
    + while (... loop condition ...) {
    +   byte[] data = ... get data ...;
    +   int dataLength = ... get data length ...;
    +   buffer.reset(data, dataLength);
    +   ... read buffer using DataInput methods ...
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new DataOutputStream and + ByteArrayOutputStream each time data is written. + +

    Typical usage is something like the following:

    +
    + DataOutputBuffer buffer = new DataOutputBuffer();
    + while (... loop condition ...) {
    +   buffer.reset();
    +   ... write buffer using DataOutput methods ...
    +   byte[] data = buffer.getData();
    +   int dataLength = buffer.getLength();
    +   ... write data to its ultimate destination ...
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + the class of the item + @param conf the configuration to store + @param item the object to be stored + @param keyName the name of the key to use + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param items the objects to be stored + @param keyName the name of the key to use + @throws IndexOutOfBoundsException if the items array is empty + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + DefaultStringifier offers convenience methods to store/load objects to/from + the configuration. + + @param the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a FloatWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When two sequence files, which have same Key type but different Value + types, are mapped out to reduce, multiple Value types is not allowed. + In this case, this class can help you wrap instances with different types. +

    + +

    + Compared with ObjectWritable, this class is much more effective, + because ObjectWritable will append the class declaration as a String + into the output file in every Key-Value pair. +

    + +

    + Generic Writable implements {@link Configurable} interface, so that it will be + configured by the framework. The configuration is passed to the wrapped objects + implementing {@link Configurable} interface before deserialization. +

    + + how to use it:
    + 1. Write your own class, such as GenericObject, which extends GenericWritable.
    + 2. Implements the abstract method getTypes(), defines + the classes which will be wrapped in GenericObject in application. + Attention: this classes defined in getTypes() method, must + implement Writable interface. +

    + + The code looks like this: +
    + public class GenericObject extends GenericWritable {
    + 
    +   private static Class[] CLASSES = {
    +               ClassType1.class, 
    +               ClassType2.class,
    +               ClassType3.class,
    +               };
    +
    +   protected Class[] getTypes() {
    +       return CLASSES;
    +   }
    +
    + }
    + 
    + + @since Nov 8, 2006]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new InputStream and + ByteArrayInputStream each time data is read. + +

    Typical usage is something like the following:

    +
    + InputBuffer buffer = new InputBuffer();
    + while (... loop condition ...) {
    +   byte[] data = ... get data ...;
    +   int dataLength = ... get data length ...;
    +   buffer.reset(data, dataLength);
    +   ... read buffer using InputStream methods ...
    + }
    + 
    + @see DataInputBuffer + @see DataOutput]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a IntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + closes the input and output streams + at the end. + @param in InputStrem to read from + @param out OutputStream to write to + @param conf the Configuration object]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a LongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A map is a directory containing two files, the data file, + containing all keys and values in the map, and a smaller index + file, containing a fraction of the keys. The fraction is determined by + {@link Writer#getIndexInterval()}. + +

    The index file is read entirely into memory. Thus key implementations + should try to keep themselves small. + +

    Map files are created by adding entries in-order. To maintain a large + database, perform updates by copying the previous version of a database and + merging in a sorted change list, to create a new version of the database in + a new file. Sorting large change lists can be done with {@link + SequenceFile.Sorter}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key and + val. Returns true if such a pair exists and false when at + the end of the map]]> + + + + + + + + + + + + + + + + key or if it does not exist, at the first entry + after the named key. + +- * @param key - key that we're trying to find +- * @param val - data value if key is found +- * @return - the key that was the closest match or null if eof.]]> + + + + + + + + + key does not exist, return + the first entry that falls just before the key. Otherwise, + return the record that sorts just after. + @return - the key that was the closest match or null if eof.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is an MD5Hash whose digest contains the + same values.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new OutputStream and + ByteArrayOutputStream each time data is written. + +

    Typical usage is something like the following:

    +
    + OutputBuffer buffer = new OutputBuffer();
    + while (... loop condition ...) {
    +   buffer.reset();
    +   ... write buffer using OutputStream methods ...
    +   byte[] data = buffer.getData();
    +   int dataLength = buffer.getLength();
    +   ... write data to its ultimate destination ...
    + }
    + 
    + @see DataOutputBuffer + @see InputBuffer]]> +
    +
    + + + + + + + + + + + + + + + A {@link Comparator} that operates directly on byte representations of + objects. +

    + @param + @see DeserializerComparator]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SequenceFiles are flat files consisting of binary key/value + pairs. + +

    SequenceFile provides {@link Writer}, {@link Reader} and + {@link Sorter} classes for writing, reading and sorting respectively.

    + + There are three SequenceFile Writers based on the + {@link CompressionType} used to compress key/value pairs: +
      +
    1. + Writer : Uncompressed records. +
    2. +
    3. + RecordCompressWriter : Record-compressed files, only compress + values. +
    4. +
    5. + BlockCompressWriter : Block-compressed files, both keys & + values are collected in 'blocks' + separately and compressed. The size of + the 'block' is configurable. +
    + +

    The actual compression algorithm used to compress key and/or values can be + specified by using the appropriate {@link CompressionCodec}.

    + +

    The recommended way is to use the static createWriter methods + provided by the SequenceFile to chose the preferred format.

    + +

    The {@link Reader} acts as the bridge and can read any of the above + SequenceFile formats.

    + +

    SequenceFile Formats

    + +

    Essentially there are 3 different formats for SequenceFiles + depending on the CompressionType specified. All of them share a + common header described below. + +

    +
      +
    • + version - 3 bytes of magic header SEQ, followed by 1 byte of actual + version number (e.g. SEQ4 or SEQ6) +
    • +
    • + keyClassName -key class +
    • +
    • + valueClassName - value class +
    • +
    • + compression - A boolean which specifies if compression is turned on for + keys/values in this file. +
    • +
    • + blockCompression - A boolean which specifies if block-compression is + turned on for keys/values in this file. +
    • +
    • + compression codec - CompressionCodec class which is used for + compression of keys and/or values (if compression is + enabled). +
    • +
    • + metadata - {@link Metadata} for this file. +
    • +
    • + sync - A sync marker to denote end of the header. +
    • +
    + +
    Uncompressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Value
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +
    Record-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Compressed Value
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +
    Block-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record Block +
        +
      • Compressed key-lengths block-size
      • +
      • Compressed key-lengths block
      • +
      • Compressed keys block-size
      • +
      • Compressed keys block
      • +
      • Compressed value-lengths block-size
      • +
      • Compressed value-lengths block
      • +
      • Compressed values block-size
      • +
      • Compressed values block
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +

    The compressed blocks of key lengths and value lengths consist of the + actual lengths of individual keys/values encoded in ZeroCompressedInteger + format.

    + + @see CompressionCodec]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key, skipping its + value. True if another entry exists, and false at end of file.]]> + + + + + + + + key and + val. Returns true if such a pair exists and false when at + end of file]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The position passed must be a position returned by {@link + SequenceFile.Writer#getLength()} when writing this file. To seek to an arbitrary + position, use {@link SequenceFile.Reader#sync(long)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SegmentDescriptor + @param segments the list of SegmentDescriptors + @param tmpDir the directory to write temporary files into + @return RawKeyValueIterator + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For best performance, applications should make sure that the {@link + Writable#readFields(DataInput)} implementation of their keys is + very efficient. In particular, it should avoid allocating memory.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This always returns a synchronized position. In other words, + immediately after calling {@link SequenceFile.Reader#seek(long)} with a position + returned by this method, {@link SequenceFile.Reader#next(Writable)} may be called. However + the key may be earlier in the file than key last written when this + method was called (e.g., with block-compression, it may be the first key + in the block that was being written when this method was called).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key. Returns + true if such a key exists and false when at the end of the set.]]> + + + + + + + key. + Returns key, or null if no match exists.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + position. Note that this + method avoids using the converter or doing String instatiation + @return the Unicode scalar value at position or -1 + if the position is invalid or points to a + trailing byte]]> + + + + + + + + + + what in the backing + buffer, starting as position start. The starting + position is measured in bytes and the return value is in + terms of byte position in the buffer. The backing buffer is + not converted to a string for this operation. + @return byte position of the first occurence of the search + string in the UTF-8 buffer or -1 if not found]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a Text with the same contents.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException.]]> + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException. + @return ByteBuffer: bytes stores at ByteBuffer.array() + and length is ByteBuffer.limit()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + In + addition, it provides methods for string traversal without converting the + byte array to a string.

    Also includes utilities for + serializing/deserialing a string, coding/decoding a string, checking if a + byte array contains valid UTF8 code, calculating the length of an encoded + string.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a UTF8 with the same contents.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + Also includes utilities for efficiently reading and writing UTF-8. + + @deprecated replaced by Text]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is useful when a class may evolve, so that instances written by the + old version of the class may still be processed by the new version. To + handle this situation, {@link #readFields(DataInput)} + implementations should catch {@link VersionMismatchException}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VIntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VLongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + out. + + @param out DataOuput to serialize this object into. + @throws IOException]]> + + + + + + + in. + +

    For efficiency, implementations should attempt to re-use storage in the + existing object where possible.

    + + @param in DataInput to deseriablize this object from. + @throws IOException]]> +
    + + + Any key or value type in the Hadoop Map-Reduce + framework implements this interface.

    + +

    Implementations typically implement a static read(DataInput) + method which constructs a new instance, calls {@link #readFields(DataInput)} + and returns the instance.

    + +

    Example:

    +

    +     public class MyWritable implements Writable {
    +       // Some data     
    +       private int counter;
    +       private long timestamp;
    +       
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +       
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +       
    +       public static MyWritable read(DataInput in) throws IOException {
    +         MyWritable w = new MyWritable();
    +         w.readFields(in);
    +         return w;
    +       }
    +     }
    + 

    ]]> +
    + + + + + + + + WritableComparables can be compared to each other, typically + via Comparators. Any type which is to be used as a + key in the Hadoop Map-Reduce framework should implement this + interface.

    + +

    Example:

    +

    +     public class MyWritableComparable implements WritableComparable {
    +       // Some data
    +       private int counter;
    +       private long timestamp;
    +       
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +       
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +       
    +       public int compareTo(MyWritableComparable w) {
    +         int thisValue = this.value;
    +         int thatValue = ((IntWritable)o).value;
    +         return (thisValue < thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
    +       }
    +     }
    + 

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The default implementation reads the data into two {@link + WritableComparable}s (using {@link + Writable#readFields(DataInput)}, then calls {@link + #compare(WritableComparable,WritableComparable)}.]]> + + + + + + + The default implementation uses the natural ordering, calling {@link + Comparable#compareTo(Object)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This base implemenation uses the natural ordering. To define alternate + orderings, override {@link #compare(WritableComparable,WritableComparable)}. + +

    One may optimize compare-intensive operations by overriding + {@link #compare(byte[],int,int,byte[],int,int)}. Static utility methods are + provided to assist in optimized implementations of this method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Enum type + @param in DataInput to read from + @param enumType Class type of Enum + @return Enum represented by String read from DataInput + @throws IOException]]> + + + + + + + + + + + + + + + + len number of bytes in input streamin + @param in input stream + @param len number of bytes to skip + @throws IOException when skipped less number of bytes]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Implementations are assumed to be buffered. This permits clients to + reposition the underlying input stream then call {@link #resetState()}, + without having to also synchronize client buffers.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + true if a preset dictionary is needed for decompression. + @return true if a preset dictionary is needed for decompression]]> + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if native-lzo library is loaded & initialized; + else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + lzo compression/decompression pair. + http://www.oberhumer.com/opensource/lzo/]]> + + + + + + + + + + + + + + + + + + + + + true if lzo compressors are loaded & initialized, + else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if lzo decompressors are loaded & initialized, + else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return the total (non-negative) number of uncompressed bytes input so far]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return the total (non-negative) number of uncompressed bytes input so far]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if native-zlib is loaded & initialized + and can be loaded for this job, else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Keep trying a limited number of times, waiting a fixed time between attempts, + and then fail by re-throwing the exception. +

    ]]> +
    + + + + + + + + Keep trying for a maximum time, waiting a fixed time between attempts, + and then fail by re-throwing the exception. +

    ]]> +
    +
    + + + + + + + Keep trying a limited number of times, waiting a growing amount of time between attempts, + and then fail by re-throwing the exception. + The time between attempts is sleepTime mutliplied by the number of tries so far. +

    ]]> +
    +
    + + + + + + + Keep trying a limited number of times, waiting a growing amount of time between attempts, + and then fail by re-throwing the exception. + The time between attempts is sleepTime mutliplied by a random + number in the range of [0, 2 to the number of retries) +

    ]]> +
    +
    + + + + + + Set a default policy with some explicit handlers for specific exceptions. +

    ]]> +
    +
    + + + + + + A retry policy for RemoteException + Set a default policy with some explicit handlers for specific exceptions. +

    ]]> +
    +
    + + + + Try once, and fail by re-throwing the exception. + This corresponds to having no retry mechanism in place. +

    ]]> +
    +
    + + + + Try once, and fail silently for void methods, or by + re-throwing the exception for non-void methods. +

    ]]> +
    +
    + + + + Keep trying forever. +

    ]]> +
    +
    + + + A collection of useful implementations of {@link RetryPolicy}. +

    ]]> +
    +
    + + + + + + + + + + Determines whether the framework should retry a + method for the given exception, and the number + of retries that have been made for that operation + so far. +

    + @param e The exception that caused the method to fail. + @param retries The number of times the method has been retried. + @return true if the method should be retried, + false if the method should not be retried + but shouldn't fail with an exception (only for void methods). + @throws Exception The re-thrown exception e indicating + that the method failed and should not be retried further.]]> +
    +
    + + + Specifies a policy for retrying method failures. + Implementations of this interface should be immutable. +

    ]]> +
    +
    + + + + + + + + + + + + Create a proxy for an interface of an implementation class + using the same retry policy for each method in the interface. +

    + @param iface the interface that the retry will implement + @param implementation the instance whose methods should be retried + @param retryPolicy the policy for retirying method call failures + @return the retry proxy]]> +
    +
    + + + + + + + Create a proxy for an interface of an implementation class + using the a set of retry policies specified by method name. + If no retry policy is defined for a method then a default of + {@link RetryPolicies#TRY_ONCE_THEN_FAIL} is used. +

    + @param iface the interface that the retry will implement + @param implementation the instance whose methods should be retried + @param methodNameToPolicyMap a map of method names to retry policies + @return the retry proxy]]> +
    +
    + + + A factory for creating retry proxies. +

    ]]> +
    +
    + + + +A mechanism for selectively retrying methods that throw exceptions under certain circumstances. +

    + +

    +Typical usage is +

    + +
    +UnreliableImplementation unreliableImpl = new UnreliableImplementation();
    +UnreliableInterface unreliable = (UnreliableInterface)
    +  RetryProxy.create(UnreliableInterface.class, unreliableImpl,
    +    RetryPolicies.retryUpToMaximumCountWithFixedSleep(4, 10, TimeUnit.SECONDS));
    +unreliable.call();
    +
    + +

    +This will retry any method called on unreliable four times - in this case the call() +method - sleeping 10 seconds between +each retry. There are a number of {@link org.apache.hadoop.io.retry.RetryPolicies retry policies} +available, or you can implement a custom one by implementing {@link org.apache.hadoop.io.retry.RetryPolicy}. +It is also possible to specify retry policies on a +{@link org.apache.hadoop.io.retry.RetryProxy#create(Class, Object, Map) per-method basis}. +

    ]]> +
    +
    + + + + + + + + Prepare the deserializer for reading.

    ]]> +
    +
    + + + + + + Deserialize the next object from the underlying input stream. + If the object t is non-null then this deserializer + may set its internal state to the next object read from the input + stream. Otherwise, if the object t is null a new + deserialized object will be created. +

    + @return the deserialized object]]> +
    +
    + + + + Close the underlying input stream and clear up any resources.

    ]]> +
    +
    + + + Provides a facility for deserializing objects of type from an + {@link InputStream}. +

    + +

    + Deserializers are stateful, but must not buffer the input since + other producers may read from the input between calls to + {@link #deserialize(Object)}. +

    + @param ]]> +
    +
    + + + + + + + + + + + + + + + + + + A {@link RawComparator} that uses a {@link Deserializer} to deserialize + the objects to be compared so that the standard {@link Comparator} can + be used to compare them. +

    +

    + One may optimize compare-intensive operations by using a custom + implementation of {@link RawComparator} that operates directly + on byte representations. +

    + @param ]]> +
    +
    + + + + + + + + + + + + + + + + + + An experimental {@link Serialization} for Java {@link Serializable} classes. +

    + @see JavaSerializationComparator]]> +
    +
    + + + + + + + + + + + + + A {@link RawComparator} that uses a {@link JavaSerialization} + {@link Deserializer} to deserialize objects that are then compared via + their {@link Comparable} interfaces. +

    + @param + @see JavaSerialization]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + Encapsulates a {@link Serializer}/{@link Deserializer} pair. +

    + @param ]]> +
    +
    + + + + + + + Serializations are found by reading the io.serializations + property from conf, which is a comma-delimited list of + classnames. +

    ]]> +
    +
    + + + + + + + + + + + + A factory for {@link Serialization}s. +

    ]]> +
    +
    + + + + + + + + Prepare the serializer for writing.

    ]]> +
    +
    + + + + + Serialize t to the underlying output stream.

    ]]> +
    +
    + + + + Close the underlying output stream and clear up any resources.

    ]]> +
    +
    + + + Provides a facility for serializing objects of type to an + {@link OutputStream}. +

    + +

    + Serializers are stateful, but must not buffer the output since + other producers may write to the output between calls to + {@link #serialize(Object)}. +

    + @param ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + +This package provides a mechanism for using different serialization frameworks +in Hadoop. The property "io.serializations" defines a list of +{@link org.apache.hadoop.io.serializer.Serialization}s that know how to create +{@link org.apache.hadoop.io.serializer.Serializer}s and +{@link org.apache.hadoop.io.serializer.Deserializer}s. +

    + +

    +To add a new serialization framework write an implementation of +{@link org.apache.hadoop.io.serializer.Serialization} and add its name to the +"io.serializations" property. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + param, to the IPC server running at + address, returning the value. Throws exceptions if there are + network problems or if the remote code threw an exception.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Unwraps any IOException. + + @param lookupTypes the desired exception class. + @return IOException, which is either the lookupClass exception or this.]]> + + + + + This unwraps any Throwable that has a constructor taking + a String as a parameter. + Otherwise it returns this. + + @return Throwable]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + protocol is a Java interface. All parameters and return types must + be one of: + +
    • a primitive type, boolean, byte, + char, short, int, long, + float, double, or void; or
    • + +
    • a {@link String}; or
    • + +
    • a {@link Writable}; or
    • + +
    • an array of the above types
    + + All methods in the protocol should throw only IOException. No field data of + the protocol instance is transmitted.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + handlerCount determines + the number of handler threads that will be used to process calls.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class has a number of metrics variables that are publicly accessible; + these variables (objects) have methods to update their values; + for example: +

    {@link #rpcDiscardedOps}.inc(time)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For the statistics that are sampled and averaged, one must specify + a metrics context that does periodic update calls. Most do. + The default Null metrics context however does NOT. So if you aren't + using any other metrics context then you can turn on the viewing and averaging + of sampled metrics by specifying the following two lines + in the hadoop-meterics.properties file: +

    +        rpc.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
    +        rpc.period=10
    +  
    +

    + Note that the metrics are collected regardless of the context used. + The context with the update thread is used to average the data periodically]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobTracker, + as {@link JobTracker.State} + + @return the current state of the JobTracker.]]> + + + + + + + + + + + + ClusterStatus provides clients with information such as: +

      +
    1. + Size of the cluster. +
    2. +
    3. + Task capacity of the cluster. +
    4. +
    5. + The number of currently running map & reduce tasks. +
    6. +
    7. + State of the JobTracker. +
    8. +

    + +

    Clients can query for the latest ClusterStatus, via + {@link JobClient#getClusterStatus()}.

    + + @see JobClient]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If the retain time is zero jobs are not persisted. +

    + A daemon thread cleans up job info files older than the retain time +

    + The retain time can be set with the 'persist.jobstatus.hours' + configuration variable (it is in hours).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Counters represent global counters, defined either by the + Map-Reduce framework or applications. Each Counter can be of + any {@link Enum} type.

    + +

    Counters are bunched into {@link Group}s, each comprising of + counters from a particular Enum class.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Group of counters, comprising of counters from a particular + counter {@link Enum} class. + +

    Grouphandles localization of the class name and the + counter names.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileInputFormat implementations can override this and return + false to ensure that individual input files are never split-up + so that {@link Mapper}s process entire files. + + @param fs the file system that the file is on + @param filename the file name to check + @return is this file splitable?]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileInputFormat is the base class for all file-based + InputFormats. This provides generic implementations of + {@link #validateInput(JobConf)} and {@link #getSplits(JobConf, int)}. + Implementations fo FileInputFormat can also override the + {@link #isSplitable(FileSystem, Path)} method to ensure input-files are + not split-up and are processed as a whole by {@link Mapper}s.]]> + + + + + + + + + + + + + + + + + + + true if the job output should be compressed, + false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tasks' Side-Effect Files + +

    Some applications need to create/write-to side-files, which differ from + the actual job-outputs. + +

    In such cases there could be issues with 2 instances of the same TIP + (running simultaneously e.g. speculative tasks) trying to open/write-to the + same file (path) on HDFS. Hence the application-writer will have to pick + unique names per task-attempt (e.g. using the taskid, say + task_200709221812_0001_m_000000_0), not just per TIP.

    + +

    To get around this the Map-Reduce framework helps the application-writer + out by maintaining a special + ${mapred.output.dir}/_temporary/_${taskid} + sub-directory for each task-attempt on HDFS where the output of the + task-attempt goes. On successful completion of the task-attempt the files + in the ${mapred.output.dir}/_temporary/_${taskid} (only) + are promoted to ${mapred.output.dir}. Of course, the + framework discards the sub-directory of unsuccessful task-attempts. This + is completely transparent to the application.

    + +

    The application-writer can take advantage of this by creating any + side-files required in ${mapred.work.output.dir} during execution + of his reduce-task i.e. via {@link #getWorkOutputPath(JobConf)}, and the + framework will move them out similarly - thus she doesn't have to pick + unique paths per task-attempt.

    + +

    Note: the value of ${mapred.work.output.dir} during + execution of a particular task-attempt is actually + ${mapred.output.dir}/_temporary/_{$taskid}, and this value is + set by the map-reduce framework. So, just create any side-files in the + path returned by {@link #getWorkOutputPath(JobConf)} from map/reduce + task to take advantage of this feature.

    + +

    The entire discussion holds true for maps of jobs with + reducer=NONE (i.e. 0 reduces) since output of the map, in that case, + goes directly to HDFS.

    + + @return the {@link Path} to the task's temporary output directory + for the map-reduce job.]]> +
    +
    + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This method is used to validate the input directories when a job is + submitted so that the {@link JobClient} can fail early, with an useful + error message, in case of errors. For e.g. input directory does not exist. +

    + + @param job job configuration. + @throws InvalidInputException if the job does not have valid input]]> +
    +
    + + + + + + Each {@link InputSplit} is then assigned to an individual {@link Mapper} + for processing.

    + +

    Note: The split is a logical split of the inputs and the + input files are not physically split into chunks. For e.g. a split could + be <input-file-path, start, offset> tuple. + + @param job job configuration. + @param numSplits the desired number of splits, a hint. + @return an array of {@link InputSplit}s for the job.]]> + + + + + + + + + It is the responsibility of the RecordReader to respect + record boundaries while processing the logical split to present a + record-oriented view to the individual task.

    + + @param split the {@link InputSplit} + @param job the job that this split belongs to + @return a {@link RecordReader}]]> +
    +
    + + InputFormat describes the input-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the InputFormat of the + job to:

    +

      +
    1. + Validate the input-specification of the job. +
    2. + Split-up the input file(s) into logical {@link InputSplit}s, each of + which is then assigned to an individual {@link Mapper}. +
    3. +
    4. + Provide the {@link RecordReader} implementation to be used to glean + input records from the logical InputSplit for processing by + the {@link Mapper}. +
    5. +
    + +

    The default behavior of file-based {@link InputFormat}s, typically + sub-classes of {@link FileInputFormat}, is to split the + input into logical {@link InputSplit}s based on the total size, in + bytes, of the input files. However, the {@link FileSystem} blocksize of + the input files is treated as an upper bound for input splits. A lower bound + on the split size can be set via + + mapred.min.split.size.

    + +

    Clearly, logical splits based on input-size is insufficient for many + applications since record boundaries are to respected. In such cases, the + application has to also implement a {@link RecordReader} on whom lies the + responsibilty to respect record-boundaries and present a record-oriented + view of the logical InputSplit to the individual task. + + @see InputSplit + @see RecordReader + @see JobClient + @see FileInputFormat]]> + + + + + + + + + + InputSplit. + + @return the number of bytes in the input split. + @throws IOException]]> + + + + + + InputSplit is + located as an array of Strings. + @throws IOException]]> + + + + InputSplit represents the data to be processed by an + individual {@link Mapper}. + +

    Typically, it presents a byte-oriented view on the input and is the + responsibility of {@link RecordReader} of the job to process this and present + a record-oriented view. + + @see InputFormat + @see RecordReader]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobClient.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + jobid doesn't correspond to any known job. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobClient is the primary interface for the user-job to interact + with the {@link JobTracker}. + + JobClient provides facilities to submit jobs, track their + progress, access component-tasks' reports/logs, get the Map-Reduce cluster + status information etc. + +

    The job submission process involves: +

      +
    1. + Checking the input and output specifications of the job. +
    2. +
    3. + Computing the {@link InputSplit}s for the job. +
    4. +
    5. + Setup the requisite accounting information for the {@link DistributedCache} + of the job, if necessary. +
    6. +
    7. + Copying the job's jar and configuration to the map-reduce system directory + on the distributed file-system. +
    8. +
    9. + Submitting the job to the JobTracker and optionally monitoring + it's status. +
    10. +

    + + Normally the user creates the application, describes various facets of the + job via {@link JobConf} and then uses the JobClient to submit + the job and monitor its progress. + +

    Here is an example on how to use JobClient:

    +

    +     // Create a new JobConf
    +     JobConf job = new JobConf(new Configuration(), MyJob.class);
    +     
    +     // Specify various job-specific parameters     
    +     job.setJobName("myjob");
    +     
    +     job.setInputPath(new Path("in"));
    +     job.setOutputPath(new Path("out"));
    +     
    +     job.setMapperClass(MyJob.MyMapper.class);
    +     job.setReducerClass(MyJob.MyReducer.class);
    +
    +     // Submit the job, then poll for progress until the job is complete
    +     JobClient.runJob(job);
    + 

    + +

    Job Control

    + +

    At times clients would chain map-reduce jobs to accomplish complex tasks + which cannot be done via a single map-reduce job. This is fairly easy since + the output of the job, typically, goes to distributed file-system and that + can be used as the input for the next job.

    + +

    However, this also means that the onus on ensuring jobs are complete + (success/failure) lies squarely on the clients. In such situations the + various job-control options are: +

      +
    1. + {@link #runJob(JobConf)} : submits the job and returns only after + the job has completed. +
    2. +
    3. + {@link #submitJob(JobConf)} : only submits the job, then poll the + returned handle to the {@link RunningJob} to query status and make + scheduling decisions. +
    4. +
    5. + {@link JobConf#setJobEndNotificationURI(String)} : setup a notification + on job-completion, thus avoiding polling. +
    6. +

    + + @see JobConf + @see ClusterStatus + @see Tool + @see DistributedCache]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if framework should keep the intermediate files + for failed tasks, false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Note: +

    + @param dir the {@link Path} of the output directory for the map-reduce job.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the outputs of the maps are to be compressed, + false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This comparator should be provided if the equivalence rules for keys + for sorting the intermediates are different from those for grouping keys + before each call to + {@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.

    + +

    For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed + in a single call to the reduce function if K1 and K2 compare as equal.

    + +

    Since {@link #setOutputKeyComparatorClass(Class)} can be used to control + how keys are sorted, this can be used in conjunction to simulate + secondary sort on values.

    + +

    Note: This is not a guarantee of the reduce sort being + stable in any sense. (In any case, with the order of available + map-outputs to the reduce being non-deterministic, it wouldn't make + that much sense.)

    + + @param theClass the comparator class to be used for grouping keys. + It should implement RawComparator. + @see #setOutputKeyComparatorClass(Class)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + combiner class used to combine map-outputs + before being sent to the reducers. Typically the combiner is same as the + the {@link Reducer} for the job i.e. {@link #getReducerClass()}. + + @return the user-defined combiner class used to combine map-outputs.]]> + + + + + + combiner class used to combine map-outputs + before being sent to the reducers. + +

    The combiner is a task-level aggregation operation which, in some cases, + helps to cut down the amount of data transferred from the {@link Mapper} to + the {@link Reducer}, leading to better performance.

    + +

    Typically the combiner is same as the Reducer for the + job i.e. {@link #setReducerClass(Class)}.

    + + @param theClass the user-defined combiner class used to combine + map-outputs.]]> +
    +
    + + + true. + + @return true if speculative execution be used for this job, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on, else false.]]> + + + + + true. + + @return true if speculative execution be + used for this job for map tasks, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on for map tasks, + else false.]]> + + + + + true. + + @return true if speculative execution be used + for reduce tasks for this job, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on for reduce tasks, + else false.]]> + + + + + 1. + + @return the number of reduce tasks for this job.]]> + + + + + + Note: This is only a hint to the framework. The actual + number of spawned map tasks depends on the number of {@link InputSplit}s + generated by the job's {@link InputFormat#getSplits(JobConf, int)}. + + A custom {@link InputFormat} is typically used to accurately control + the number of map tasks for the job.

    + +

    How many maps?

    + +

    The number of maps is usually driven by the total size of the inputs + i.e. total number of blocks of the input files.

    + +

    The right level of parallelism for maps seems to be around 10-100 maps + per-node, although it has been set up to 300 or so for very cpu-light map + tasks. Task setup takes awhile, so it is best if the maps take at least a + minute to execute.

    + +

    The default behavior of file-based {@link InputFormat}s is to split the + input into logical {@link InputSplit}s based on the total size, in + bytes, of input files. However, the {@link FileSystem} blocksize of the + input files is treated as an upper bound for input splits. A lower bound + on the split size can be set via + + mapred.min.split.size.

    + +

    Thus, if you expect 10TB of input data and have a blocksize of 128MB, + you'll end up with 82,000 maps, unless {@link #setNumMapTasks(int)} is + used to set it even higher.

    + + @param n the number of map tasks for this job. + @see InputFormat#getSplits(JobConf, int) + @see FileInputFormat + @see FileSystem#getDefaultBlockSize() + @see FileStatus#getBlockSize()]]> +
    +
    + + + 1. + + @return the number of reduce tasks for this job.]]> + + + + + + How many reduces? + +

    The right number of reduces seems to be 0.95 or + 1.75 multiplied by (<no. of nodes> * + + mapred.tasktracker.reduce.tasks.maximum). +

    + +

    With 0.95 all of the reduces can launch immediately and + start transfering map outputs as the maps finish. With 1.75 + the faster nodes will finish their first round of reduces and launch a + second wave of reduces doing a much better job of load balancing.

    + +

    Increasing the number of reduces increases the framework overhead, but + increases load balancing and lowers the cost of failures.

    + +

    The scaling factors above are slightly less than whole numbers to + reserve a few reduce slots in the framework for speculative-tasks, failures + etc.

    + +

    Reducer NONE

    + +

    It is legal to set the number of reduce-tasks to zero.

    + +

    In this case the output of the map-tasks directly go to distributed + file-system, to the path set by + {@link FileOutputFormat#setOutputPath(JobConf, Path)}. Also, the + framework doesn't sort the map-outputs before writing it out to HDFS.

    + + @param n the number of reduce tasks for this job.]]> +
    +
    + + + mapred.map.max.attempts + property. If this property is not already set, the default is 4 attempts. + + @return the max number of attempts per map task.]]> + + + + + + + + + + + mapred.reduce.max.attempts + property. If this property is not already set, the default is 4 attempts. + + @return the max number of attempts per reduce task.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + noFailures, the + tasktracker is blacklisted for this job. + + @param noFailures maximum no. of failures of a given job per tasktracker.]]> + + + + + blacklisted for this job. + + @return the maximum no. of failures of a given job per tasktracker.]]> + + + + + failed. + + Defaults to zero, i.e. any failed map-task results in + the job being declared as {@link JobStatus#FAILED}. + + @return the maximum percentage of map tasks that can fail without + the job being aborted.]]> + + + + + + failed. + + @param percent the maximum percentage of map tasks that can fail without + the job being aborted.]]> + + + + + failed. + + Defaults to zero, i.e. any failed reduce-task results + in the job being declared as {@link JobStatus#FAILED}. + + @return the maximum percentage of reduce tasks that can fail without + the job being aborted.]]> + + + + + + failed. + + @param percent the maximum percentage of reduce tasks that can fail without + the job being aborted.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The debug script can aid debugging of failed map tasks. The script is + given task's stdout, stderr, syslog, jobconf files as arguments.

    + +

    The debug command, run on the node where the map failed, is:

    +

    + $script $stdout $stderr $syslog $jobconf. +

    + +

    The script file is distributed through {@link DistributedCache} + APIs. The script needs to be symlinked.

    + +

    Here is an example on how to submit a script +

    + job.setMapDebugScript("./myscript");
    + DistributedCache.createSymlink(job);
    + DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
    + 

    + + @param mDbgScript the script name]]> +
    +
    + + + + + + + + + The debug script can aid debugging of failed reduce tasks. The script + is given task's stdout, stderr, syslog, jobconf files as arguments.

    + +

    The debug command, run on the node where the map failed, is:

    +

    + $script $stdout $stderr $syslog $jobconf. +

    + +

    The script file is distributed through {@link DistributedCache} + APIs. The script file needs to be symlinked

    + +

    Here is an example on how to submit a script +

    + job.setReduceDebugScript("./myscript");
    + DistributedCache.createSymlink(job);
    + DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
    + 

    + + @param rDbgScript the script name]]> +
    +
    + + + + + + + + null if it hasn't + been set. + @see #setJobEndNotificationURI(String)]]> + + + + + + The uri can contain 2 special parameters: $jobId and + $jobStatus. Those, if present, are replaced by the job's + identifier and completion-status respectively.

    + +

    This is typically used by application-writers to implement chaining of + Map-Reduce jobs in an asynchronous manner.

    + + @param uri the job end notification uri + @see JobStatus + @see Job Completion and Chaining]]> +
    +
    + + + + When a job starts, a shared directory is created at location + + ${mapred.local.dir}/taskTracker/jobcache/$jobid/work/ . + This directory is exposed to the users through + job.local.dir . + So, the tasks can use this space + as scratch space and share files among them.

    + This value is available as System property also. + + @return The localized job specific shared directory]]> +
    +
    + + JobConf is the primary interface for a user to describe a + map-reduce job to the Hadoop framework for execution. The framework tries to + faithfully execute the job as-is described by JobConf, however: +
      +
    1. + Some configuration parameters might have been marked as + + final by administrators and hence cannot be altered. +
    2. +
    3. + While some job parameters are straight-forward to set + (e.g. {@link #setNumReduceTasks(int)}), some parameters interact subtly + rest of the framework and/or job-configuration and is relatively more + complex for the user to control finely (e.g. {@link #setNumMapTasks(int)}). +
    4. +

    + +

    JobConf typically specifies the {@link Mapper}, combiner + (if any), {@link Partitioner}, {@link Reducer}, {@link InputFormat} and + {@link OutputFormat} implementations to be used etc. + +

    Optionally JobConf is used to specify other advanced facets + of the job such as Comparators to be used, files to be put in + the {@link DistributedCache}, whether or not intermediate and/or job outputs + are to be compressed (and how), debugability via user-provided scripts + ( {@link #setMapDebugScript(String)}/{@link #setReduceDebugScript(String)}), + for doing post-processing on task logs, task's stdout, stderr, syslog. + and etc.

    + +

    Here is an example on how to configure a job via JobConf:

    +

    +     // Create a new JobConf
    +     JobConf job = new JobConf(new Configuration(), MyJob.class);
    +     
    +     // Specify various job-specific parameters     
    +     job.setJobName("myjob");
    +     
    +     FileInputFormat.setInputPaths(job, new Path("in"));
    +     FileOutputFormat.setOutputPath(job, new Path("out"));
    +     
    +     job.setMapperClass(MyJob.MyMapper.class);
    +     job.setCombinerClass(MyJob.MyReducer.class);
    +     job.setReducerClass(MyJob.MyReducer.class);
    +     
    +     job.setInputFormat(SequenceFileInputFormat.class);
    +     job.setOutputFormat(SequenceFileOutputFormat.class);
    + 

    + + @see JobClient + @see ClusterStatus + @see Tool + @see DistributedCache]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + .]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -archives + -files inputjar args]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + system-dir/jobName.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + zero. + + @param conf configuration for the JobTracker. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + io.file.buffer.size specified in the given + Configuration. + @param in input stream + @param conf configuration + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Output pairs need not be of the same types as input pairs. A given + input pair may map to zero or many output pairs. Output pairs are + collected with calls to + {@link OutputCollector#collect(Object,Object)}.

    + +

    Applications can use the {@link Reporter} provided to report progress + or just indicate that they are alive. In scenarios where the application + takes an insignificant amount of time to process individual key/value + pairs, this is crucial since the framework might assume that the task has + timed-out and kill that task. The other way of avoiding this is to set + + mapred.task.timeout to a high-enough value (or even zero for no + time-outs).

    + + @param key the input key. + @param value the input value. + @param output collects mapped keys and values. + @param reporter facility to report progress.]]> +
    +
    + + Maps are the individual tasks which transform input records into a + intermediate records. The transformed intermediate records need not be of + the same type as the input records. A given input pair may map to zero or + many output pairs.

    + +

    The Hadoop Map-Reduce framework spawns one map task for each + {@link InputSplit} generated by the {@link InputFormat} for the job. + Mapper implementations can access the {@link JobConf} for the + job via the {@link JobConfigurable#configure(JobConf)} and initialize + themselves. Similarly they can use the {@link Closeable#close()} method for + de-initialization.

    + +

    The framework then calls + {@link #map(Object, Object, OutputCollector, Reporter)} + for each key/value pair in the InputSplit for that task.

    + +

    All intermediate values associated with a given output key are + subsequently grouped by the framework, and passed to a {@link Reducer} to + determine the final output. Users can control the grouping by specifying + a Comparator via + {@link JobConf#setOutputKeyComparatorClass(Class)}.

    + +

    The grouped Mapper outputs are partitioned per + Reducer. Users can control which keys (and hence records) go to + which Reducer by implementing a custom {@link Partitioner}. + +

    Users can optionally specify a combiner, via + {@link JobConf#setCombinerClass(Class)}, to perform local aggregation of the + intermediate outputs, which helps to cut down the amount of data transferred + from the Mapper to the Reducer. + +

    The intermediate, grouped outputs are always stored in + {@link SequenceFile}s. Applications can specify if and how the intermediate + outputs are to be compressed and which {@link CompressionCodec}s are to be + used via the JobConf.

    + +

    If the job has + zero + reduces then the output of the Mapper is directly written + to the {@link FileSystem} without grouping by keys.

    + +

    Example:

    +

    +     public class MyMapper<K extends WritableComparable, V extends Writable> 
    +     extends MapReduceBase implements Mapper<K, V, K, V> {
    +     
    +       static enum MyCounters { NUM_RECORDS }
    +       
    +       private String mapTaskId;
    +       private String inputFile;
    +       private int noRecords = 0;
    +       
    +       public void configure(JobConf job) {
    +         mapTaskId = job.get("mapred.task.id");
    +         inputFile = job.get("mapred.input.file");
    +       }
    +       
    +       public void map(K key, V val,
    +                       OutputCollector<K, V> output, Reporter reporter)
    +       throws IOException {
    +         // Process the <key, value> pair (assume this takes a while)
    +         // ...
    +         // ...
    +         
    +         // Let the framework know that we are alive, and kicking!
    +         // reporter.progress();
    +         
    +         // Process some more
    +         // ...
    +         // ...
    +         
    +         // Increment the no. of <key, value> pairs processed
    +         ++noRecords;
    +
    +         // Increment counters
    +         reporter.incrCounter(NUM_RECORDS, 1);
    +        
    +         // Every 100 records update application-level status
    +         if ((noRecords%100) == 0) {
    +           reporter.setStatus(mapTaskId + " processed " + noRecords + 
    +                              " from input-file: " + inputFile); 
    +         }
    +         
    +         // Output the result
    +         output.collect(key, val);
    +       }
    +     }
    + 

    + +

    Applications may write a custom {@link MapRunnable} to exert greater + control on map processing e.g. multi-threaded Mappers etc.

    + + @see JobConf + @see InputFormat + @see Partitioner + @see Reducer + @see MapReduceBase + @see MapRunnable + @see SequenceFile]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + Provides default no-op implementations for a few methods, most non-trivial + applications need to override some of them.

    ]]> +
    +
    + + + + + + + + + + + <key, value> pairs. + +

    Mapping of input records to output records is complete when this method + returns.

    + + @param input the {@link RecordReader} to read the input records. + @param output the {@link OutputCollector} to collect the outputrecords. + @param reporter {@link Reporter} to report progress, status-updates etc. + @throws IOException]]> +
    +
    + + Custom implementations of MapRunnable can exert greater + control on map processing e.g. multi-threaded, asynchronous mappers etc.

    + + @see Mapper]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + nearly + equal content length.
    + Subclasses implement {@link #getRecordReader(InputSplit, JobConf, Reporter)} + to construct RecordReader's for MultiFileSplit's. + @see MultiFileSplit]]> +
    +
    + + + + + + + + + + + + + + + + + th Path]]> + + + + + + + + + + + th Path]]> + + + + + + + + + + + + + + + + + + + + + + + MultiFileSplit can be used to implement {@link RecordReader}'s, with + reading one record per file. + @see FileSplit + @see MultiFileInputFormat]]> + + + + + + + + + + + + + + + <key, value> pairs output by {@link Mapper}s + and {@link Reducer}s. + +

    OutputCollector is the generalization of the facility + provided by the Map-Reduce framework to collect data output by either the + Mapper or the Reducer i.e. intermediate outputs + or the output of the job.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + This is to validate the output specification for the job when it is + a job is submitted. Typically checks that it does not already exist, + throwing an exception when it already exists, so that output is not + overwritten.

    + + @param ignored + @param job job configuration. + @throws IOException when output should not be attempted]]> +
    +
    + + OutputFormat describes the output-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the OutputFormat of the + job to:

    +

      +
    1. + Validate the output-specification of the job. For e.g. check that the + output directory doesn't already exist. +
    2. + Provide the {@link RecordWriter} implementation to be used to write out + the output files of the job. Output files are stored in a + {@link FileSystem}. +
    3. +
    + + @see RecordWriter + @see JobConf]]> +
    +
    + + + + + + + + + + + + + + + + + true if the job output should be compressed, + false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Typically a hash function on a all or a subset of the key.

    + + @param key the key to be paritioned. + @param value the entry value. + @param numPartitions the total number of partitions. + @return the partition number for the key.]]> +
    +
    + + Partitioner controls the partitioning of the keys of the + intermediate map-outputs. The key (or a subset of the key) is used to derive + the partition, typically by a hash function. The total number of partitions + is the same as the number of reduce tasks for the job. Hence this controls + which of the m reduce tasks the intermediate key (and hence the + record) is sent for reduction.

    + + @see Reducer]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.0 to 1.0. + @throws IOException]]> + + + + RecordReader reads <key, value> pairs from an + {@link InputSplit}. + +

    RecordReader, typically, converts the byte-oriented view of + the input, provided by the InputSplit, and presents a + record-oriented view for the {@link Mapper} & {@link Reducer} tasks for + processing. It thus assumes the responsibility of processing record + boundaries and presenting the tasks with keys and values.

    + + @see InputSplit + @see InputFormat]]> +
    +
    + + + + + + + + + + + + + + + + RecordWriter to future operations. + + @param reporter facility to report progress. + @throws IOException]]> + + + + RecordWriter writes the output <key, value> pairs + to an output file. + +

    RecordWriter implementations write the job outputs to the + {@link FileSystem}. + + @see OutputFormat]]> + + + + + + + + + + + + + + + Reduces values for a given key. + +

    The framework calls this method for each + <key, (list of values)> pair in the grouped inputs. + Output values must be of the same type as input values. Input keys must + not be altered. Typically all values are combined into zero or one value. +

    + +

    Output pairs are collected with calls to + {@link OutputCollector#collect(Object,Object)}.

    + +

    Applications can use the {@link Reporter} provided to report progress + or just indicate that they are alive. In scenarios where the application + takes an insignificant amount of time to process individual key/value + pairs, this is crucial since the framework might assume that the task has + timed-out and kill that task. The other way of avoiding this is to set + + mapred.task.timeout to a high-enough value (or even zero for no + time-outs).

    + + @param key the key. + @param values the list of values to reduce. + @param output to collect keys and combined values. + @param reporter facility to report progress.]]> +
    + + + The number of Reducers for the job is set by the user via + {@link JobConf#setNumReduceTasks(int)}. Reducer implementations + can access the {@link JobConf} for the job via the + {@link JobConfigurable#configure(JobConf)} method and initialize themselves. + Similarly they can use the {@link Closeable#close()} method for + de-initialization.

    + +

    Reducer has 3 primary phases:

    +
      +
    1. + +

      Shuffle

      + +

      Reducer is input the grouped output of a {@link Mapper}. + In the phase the framework, for each Reducer, fetches the + relevant partition of the output of all the Mappers, via HTTP. +

      +
    2. + +
    3. +

      Sort

      + +

      The framework groups Reducer inputs by keys + (since different Mappers may have output the same key) in this + stage.

      + +

      The shuffle and sort phases occur simultaneously i.e. while outputs are + being fetched they are merged.

      + +
      SecondarySort
      + +

      If equivalence rules for keys while grouping the intermediates are + different from those for grouping keys before reduction, then one may + specify a Comparator via + {@link JobConf#setOutputValueGroupingComparator(Class)}.Since + {@link JobConf#setOutputKeyComparatorClass(Class)} can be used to + control how intermediate keys are grouped, these can be used in conjunction + to simulate secondary sort on values.

      + + + For example, say that you want to find duplicate web pages and tag them + all with the url of the "best" known example. You would set up the job + like: +
        +
      • Map Input Key: url
      • +
      • Map Input Value: document
      • +
      • Map Output Key: document checksum, url pagerank
      • +
      • Map Output Value: url
      • +
      • Partitioner: by checksum
      • +
      • OutputKeyComparator: by checksum and then decreasing pagerank
      • +
      • OutputValueGroupingComparator: by checksum
      • +
      +
    4. + +
    5. +

      Reduce

      + +

      In this phase the + {@link #reduce(Object, Iterator, OutputCollector, Reporter)} + method is called for each <key, (list of values)> pair in + the grouped inputs.

      +

      The output of the reduce task is typically written to the + {@link FileSystem} via + {@link OutputCollector#collect(Object, Object)}.

      +
    6. +
    + +

    The output of the Reducer is not re-sorted.

    + +

    Example:

    +

    +     public class MyReducer<K extends WritableComparable, V extends Writable> 
    +     extends MapReduceBase implements Reducer<K, V, K, V> {
    +     
    +       static enum MyCounters { NUM_RECORDS }
    +        
    +       private String reduceTaskId;
    +       private int noKeys = 0;
    +       
    +       public void configure(JobConf job) {
    +         reduceTaskId = job.get("mapred.task.id");
    +       }
    +       
    +       public void reduce(K key, Iterator<V> values,
    +                          OutputCollector<K, V> output, 
    +                          Reporter reporter)
    +       throws IOException {
    +       
    +         // Process
    +         int noValues = 0;
    +         while (values.hasNext()) {
    +           V value = values.next();
    +           
    +           // Increment the no. of values for this key
    +           ++noValues;
    +           
    +           // Process the <key, value> pair (assume this takes a while)
    +           // ...
    +           // ...
    +           
    +           // Let the framework know that we are alive, and kicking!
    +           if ((noValues%10) == 0) {
    +             reporter.progress();
    +           }
    +         
    +           // Process some more
    +           // ...
    +           // ...
    +           
    +           // Output the <key, value> 
    +           output.collect(key, value);
    +         }
    +         
    +         // Increment the no. of <key, list of values> pairs processed
    +         ++noKeys;
    +         
    +         // Increment counters
    +         reporter.incrCounter(NUM_RECORDS, 1);
    +         
    +         // Every 100 keys update application-level status
    +         if ((noKeys%100) == 0) {
    +           reporter.setStatus(reduceTaskId + " processed " + noKeys);
    +         }
    +       }
    +     }
    + 

    + + @see Mapper + @see Partitioner + @see Reporter + @see MapReduceBase]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Enum. + @param amount A non-negative amount by which the counter is to + be incremented.]]> + + + + + + InputSplit that the map is reading from. + @throws UnsupportedOperationException if called outside a mapper]]> + + + + + + + + + {@link Mapper} and {@link Reducer} can use the Reporter + provided to report progress or just indicate that they are alive. In + scenarios where the application takes an insignificant amount of time to + process individual key/value pairs, this is crucial since the framework + might assume that the task has timed-out and kill that task. + +

    Applications can also update {@link Counters} via the provided + Reporter .

    + + @see Progressable + @see Counters]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + progress of the job's map-tasks, as a float between 0.0 + and 1.0. When all map tasks have completed, the function returns 1.0. + + @return the progress of the job's map-tasks. + @throws IOException]]> + + + + + + progress of the job's reduce-tasks, as a float between 0.0 + and 1.0. When all reduce tasks have completed, the function returns 1.0. + + @return the progress of the job's reduce-tasks. + @throws IOException]]> + + + + + + true if the job is complete, else false. + @throws IOException]]> + + + + + + true if the job succeeded, else false. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RunningJob is the user-interface to query for details on a + running Map-Reduce job. + +

    Clients can get hold of RunningJob via the {@link JobClient} + and then query the running-job for details such as name, configuration, + progress etc.

    + + @see JobClient]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + f. The filtering criteria is + MD5(key) % f == 0.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + f using + the criteria record# % f == 0. + For example, if the frequency is 10, one out of 10 records is returned.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + . + @param name The name of the server + @param port The port to use on the server + @param findPort whether the server should start at the given port and + increment by 1 until it finds a free port.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + points to the log directory + "/static/" -> points to common static files (src/webapps/static) + "/" -> the jsp server code from (src/webapps/)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hadoop.log.dir.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A software framework for easily writing applications which process vast +amounts of data (multi-terabyte data-sets) parallelly on large clusters +(thousands of nodes) built of commodity hardware in a reliable, fault-tolerant +manner.

    + +

    A Map-Reduce job usually splits the input data-set into independent +chunks which processed by map tasks in completely parallel manner, +followed by reduce tasks which aggregating their output. Typically both +the input and the output of the job are stored in a +{@link org.apache.hadoop.fs.FileSystem}. The framework takes care of monitoring +tasks and re-executing failed ones. Since, usually, the compute nodes and the +storage nodes are the same i.e. Hadoop's Map-Reduce framework and Distributed +FileSystem are running on the same set of nodes, tasks are effectively scheduled +on the nodes where data is already present, resulting in very high aggregate +bandwidth across the cluster.

    + +

    The Map-Reduce framework operates exclusively on <key, value> +pairs i.e. the input to the job is viewed as a set of <key, value> +pairs and the output as another, possibly different, set of +<key, value> pairs. The keys and values have to +be serializable as {@link org.apache.hadoop.io.Writable}s and additionally the +keys have to be {@link org.apache.hadoop.io.WritableComparable}s in +order to facilitate grouping by the framework.

    + +

    Data flow:

    +
    +                                (input)
    +                                <k1, v1>
    +       
    +                                   |
    +                                   V
    +       
    +                                  map
    +       
    +                                   |
    +                                   V
    +
    +                                <k2, v2>
    +       
    +                                   |
    +                                   V
    +       
    +                                combine
    +       
    +                                   |
    +                                   V
    +       
    +                                <k2, v2>
    +       
    +                                   |
    +                                   V
    +       
    +                                 reduce
    +       
    +                                   |
    +                                   V
    +       
    +                                <k3, v3>
    +                                (output)
    +
    + +

    Applications typically implement +{@link org.apache.hadoop.mapred.Mapper#map(Object, Object, OutputCollector, Reporter)} +and +{@link org.apache.hadoop.mapred.Reducer#reduce(Object, Iterator, OutputCollector, Reporter)} +methods. The application-writer also specifies various facets of the job such +as input and output locations, the Partitioner, InputFormat +& OutputFormat implementations to be used etc. as +a {@link org.apache.hadoop.mapred.JobConf}. The client program, +{@link org.apache.hadoop.mapred.JobClient}, then submits the job to the framework +and optionally monitors it.

    + +

    The framework spawns one map task per +{@link org.apache.hadoop.mapred.InputSplit} generated by the +{@link org.apache.hadoop.mapred.InputFormat} of the job and calls +{@link org.apache.hadoop.mapred.Mapper#map(Object, Object, OutputCollector, Reporter)} +with each <key, value> pair read by the +{@link org.apache.hadoop.mapred.RecordReader} from the InputSplit for +the task. The intermediate outputs of the maps are then grouped by keys +and optionally aggregated by combiner. The key space of intermediate +outputs are paritioned by the {@link org.apache.hadoop.mapred.Partitioner}, where +the number of partitions is exactly the number of reduce tasks for the job.

    + +

    The reduce tasks fetch the sorted intermediate outputs of the maps, via http, +merge the <key, value> pairs and call +{@link org.apache.hadoop.mapred.Reducer#reduce(Object, Iterator, OutputCollector, Reporter)} +for each <key, list of values> pair. The output of the reduce tasks' is +stored on the FileSystem by the +{@link org.apache.hadoop.mapred.RecordWriter} provided by the +{@link org.apache.hadoop.mapred.OutputFormat} of the job.

    + +

    Map-Reduce application to perform a distributed grep:

    +
    
    +public class Grep extends Configured implements Tool {
    +
    +  // map: Search for the pattern specified by 'grep.mapper.regex' &
    +  //      'grep.mapper.regex.group'
    +
    +  class GrepMapper<K, Text> 
    +  extends MapReduceBase  implements Mapper<K, Text, Text, LongWritable> {
    +
    +    private Pattern pattern;
    +    private int group;
    +
    +    public void configure(JobConf job) {
    +      pattern = Pattern.compile(job.get("grep.mapper.regex"));
    +      group = job.getInt("grep.mapper.regex.group", 0);
    +    }
    +
    +    public void map(K key, Text value,
    +                    OutputCollector<Text, LongWritable> output,
    +                    Reporter reporter)
    +    throws IOException {
    +      String text = value.toString();
    +      Matcher matcher = pattern.matcher(text);
    +      while (matcher.find()) {
    +        output.collect(new Text(matcher.group(group)), new LongWritable(1));
    +      }
    +    }
    +  }
    +
    +  // reduce: Count the number of occurrences of the pattern
    +
    +  class GrepReducer<K> extends MapReduceBase
    +  implements Reducer<K, LongWritable, K, LongWritable> {
    +
    +    public void reduce(K key, Iterator<LongWritable> values,
    +                       OutputCollector<K, LongWritable> output,
    +                       Reporter reporter)
    +    throws IOException {
    +
    +      // sum all values for this key
    +      long sum = 0;
    +      while (values.hasNext()) {
    +        sum += values.next().get();
    +      }
    +
    +      // output sum
    +      output.collect(key, new LongWritable(sum));
    +    }
    +  }
    +  
    +  public int run(String[] args) throws Exception {
    +    if (args.length < 3) {
    +      System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
    +      ToolRunner.printGenericCommandUsage(System.out);
    +      return -1;
    +    }
    +
    +    JobConf grepJob = new JobConf(getConf(), Grep.class);
    +    
    +    grepJob.setJobName("grep");
    +
    +    grepJob.setInputPath(new Path(args[0]));
    +    grepJob.setOutputPath(args[1]);
    +
    +    grepJob.setMapperClass(GrepMapper.class);
    +    grepJob.setCombinerClass(GrepReducer.class);
    +    grepJob.setReducerClass(GrepReducer.class);
    +
    +    grepJob.set("mapred.mapper.regex", args[2]);
    +    if (args.length == 4)
    +      grepJob.set("mapred.mapper.regex.group", args[3]);
    +
    +    grepJob.setOutputFormat(SequenceFileOutputFormat.class);
    +    grepJob.setOutputKeyClass(Text.class);
    +    grepJob.setOutputValueClass(LongWritable.class);
    +
    +    JobClient.runJob(grepJob);
    +
    +    return 0;
    +  }
    +
    +  public static void main(String[] args) throws Exception {
    +    int res = ToolRunner.run(new Configuration(), new Grep(), args);
    +    System.exit(res);
    +  }
    +
    +}
    +
    + +

    Notice how the data-flow of the above grep job is very similar to doing the +same via the unix pipeline:

    + +
    +cat input/*   |   grep   |   sort    |   uniq -c   >   out
    +
    + +
    +      input   |    map   |  shuffle  |   reduce    >   out
    +
    + +

    Hadoop Map-Reduce applications need not be written in +JavaTM only. +Hadoop Streaming is a utility +which allows users to create and run jobs with any executables (e.g. shell +utilities) as the mapper and/or the reducer. +Hadoop Pipes is a +SWIG-compatible C++ API to implement +Map-Reduce applications (non JNITM based).

    + +

    See Google's original +Map/Reduce paper for background information.

    + +

    Java and JNI are trademarks or registered trademarks of +Sun Microsystems, Inc. in the United States and other countries.

    ]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the Job was added.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Utilities for managing dependent jobs.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ([,]*) + func ::= tbl(,"") + class ::= @see java.lang.Class#forName(java.lang.String) + path ::= @see org.apache.hadoop.fs.Path#Path(java.lang.String) + } + Reads expression from the mapred.join.expr property and + user-supplied join types from mapred.join.define.<ident> + types. Paths supplied to tbl are given as input paths to the + InputFormat class listed. + @see #compose(java.lang.String, java.lang.Class, java.lang.String...)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ,

    ) }]]> + + + + + + + + (tbl(,),tbl(,),...,tbl(,)) }]]> + + + + + + + + (tbl(,),tbl(,),...,tbl(,)) }]]> + + + + mapred.join.define.<ident> to a classname. In the expression + mapred.join.expr, the identifier will be assumed to be a + ComposableRecordReader. + mapred.join.keycomparator can be a classname used to compare keys + in the join. + @see JoinRecordReader + @see MultiFilterRecordReader]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ...... + }]]> + + + + + + + + + + + + + + + + + + + + + capacity children to position + id in the parent reader. + The id of a root CompositeRecordReader is -1 by convention, but relying + on this is not recommended.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + override(S1,S2,S3) will prefer values + from S3 over S2, and values from S2 over S1 for all keys + emitted from all sources.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [,,...,]]]> + + + + + + + out. + TupleWritable format: + {@code + ...... + }]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Given a set of sorted datasets keyed with the same class and yielding equal +partitions, it is possible to effect a join of those datasets prior to the map. +This could save costs in re-partitioning, sorting, shuffling, and writing out +data required in the general case.

    + +

    Interface

    + +

    The attached code offers the following interface to users of these +classes.

    + + + + + + + + + +
    propertyrequiredvalue
    mapred.join.expryesJoin expression to effect over input data
    mapred.join.keycomparatornoWritableComparator class to use for comparing keys
    mapred.join.define.<ident>noClass mapped to identifier in join expression
    + +

    The join expression understands the following grammar:

    + +
    func ::= <ident>([<func>,]*<func>)
    +func ::= tbl(<class>,"<path>");
    +
    +
    + +

    Operations included in this patch are partitioned into one of two types: +join operations emitting tuples and "multi-filter" operations emitting a +single value from (but not necessarily included in) a set of input values. +For a given key, each operation will consider the cross product of all +values for all sources at that node.

    + +

    Identifiers supported by default:

    + + + + + + + +
    identifiertypedescription
    innerJoinFull inner join
    outerJoinFull outer join
    overrideMultiFilterFor a given key, prefer values from the rightmost source
    + +

    A user of this class must set the InputFormat for the job to +CompositeInputFormat and define a join expression accepted by the +preceding grammar. For example, both of the following are acceptable:

    + +
    inner(tbl(org.apache.hadoop.mapred.SequenceFileInputFormat.class,
    +          "hdfs://host:8020/foo/bar"),
    +      tbl(org.apache.hadoop.mapred.SequenceFileInputFormat.class,
    +          "hdfs://host:8020/foo/baz"))
    +
    +outer(override(tbl(org.apache.hadoop.mapred.SequenceFileInputFormat.class,
    +                   "hdfs://host:8020/foo/bar"),
    +               tbl(org.apache.hadoop.mapred.SequenceFileInputFormat.class,
    +                   "hdfs://host:8020/foo/baz")),
    +      tbl(org.apache.hadoop.mapred/SequenceFileInputFormat.class,
    +          "hdfs://host:8020/foo/rab"))
    +
    + +

    CompositeInputFormat includes a handful of convenience methods to +aid construction of these verbose statements.

    + +

    As in the second example, joins may be nested. Users may provide a +comparator class in the mapred.join.keycomparator property to specify +the ordering of their keys, or accept the default comparator as returned by +WritableComparator.get(keyclass).

    + +

    Users can specify their own join operations, typically by overriding +JoinRecordReader or MultiFilterRecordReader and mapping that +class to an identifier in the join expression using the +mapred.join.define.ident property, where ident is +the identifier appearing in the join expression. Users may elect to emit- or +modify- values passing through their join operation. Consulting the existing +operations for guidance is recommended. Adding arguments is considerably more +complex (and only partially supported), as one must also add a Node +type to the parse tree. One is probably better off extending +RecordReader in most cases.

    + +JIRA]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It can be used instead of the default implementation, + @link org.apache.hadoop.mapred.MapRunner, when the Map operation is not CPU + bound in order to improve throughput. +

    + Map implementations using this MapRunnable must be thread-safe. +

    + The Map-Reduce job has to be configured to use this MapRunnable class (using + the JobConf.setMapRunnerClass method) and + the number of thread the thread-pool can use with the + mapred.map.multithreadedrunner.threads property, its default + value is 10 threads. +

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + pairs. Uses + {@link StringTokenizer} to break text into tokens.]]> + + + + + Library of generally useful mappers, reducers, and partitioners.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + generateKeyValPairs(Object key, Object value); public void + configure(JobConfjob); } + + The package also provides a base class, ValueAggregatorBaseDescriptor, + implementing the above interface. The user can extend the base class and + implement generateKeyValPairs accordingly. + + The primary work of generateKeyValPairs is to emit one or more key/value + pairs based on the input key/value pair. The key in an output key/value pair + encode two pieces of information: aggregation type and aggregation id. The + value will be aggregated onto the aggregation id according the aggregation + type. + + This class offers a function to generate a map/reduce job using Aggregate + framework. The function takes the following parameters: input directory spec + input format (text or sequence file) output directory a file specifying the + user plugin class]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    Aggregate framework

    +

    +Generally speaking, in order to implement an application using Map/Reduce +model, the developer needs to implement Map and Reduce functions (and possibly +Combine function). However, for a lot of applications related to counting and +statistics computing, these functions have very similar +characteristics. This provides a package implementing +those patterns. In particular, the package provides a generic mapper class, +a reducer class and a combiner class, and a set of built-in value aggregators. +It also provides a generic utility class, ValueAggregatorJob, that offers a static function that +creates map/reduce jobs: +

    +
    +public static JobConf createValueAggregatorJob(String args[]) throws IOException;
    +
    +
    +To call this function, the user needs to pass in arguments specifying the input directories, the output directory, +the number of reducers, the input data format (textinputformat or sequencefileinputformat), and a file specifying user plugin class(es) to load by the mapper. +A user plugin class is responsible for specifying what +aggregators to use and what values are for which aggregators. +A plugin class must implement the following interface: +
    +
    + public interface ValueAggregatorDescriptor { 
    +     public ArrayList<Entry> generateKeyValPairs(Object key, Object value); 
    +     public void configure(JobConfjob); 
    +} 
    +
    +
    +Function generateKeyValPairs will generate aggregation key/value pairs for the +input key/value pair. Each aggregation key encodes two pieces of information: the aggregation type and aggregation ID. +The value is the value to be aggregated onto the aggregation ID according to the aggregation type. Here +is a simple example user plugin class for counting the words in the input texts: +
    +
    +public class WordCountAggregatorDescriptor extends ValueAggregatorBaseDescriptor { 
    +    public ArrayList<Entry> generateKeyValPairs(Object key, Object val) {
    +        String words [] = val.toString().split(" |\t");
    +        ArrayList<Entry> retv = new ArrayList<Entry>();
    +        for (int i = 0; i < words.length; i++) {
    +            retv.add(generateEntry(LONG_VALUE_SUM, words[i], ONE))
    +        }
    +        return retv;
    +    }
    +    public void configure(JobConf job) {}
    +} 
    +
    +
    +In the above code, LONG_VALUE_SUM is a string denoting the aggregation type LongValueSum, which sums over long values. +ONE denotes a string "1". Function generateEntry(LONG_VALUE_SUM, words[i], ONE) will inperpret the first argument as an aggregation type, the second as an aggregation ID, and the third argumnent as the value to be aggregated. The output will look like: "LongValueSum:xxxx", where XXXX is the string value of words[i]. The value will be "1". The mapper will call generateKeyValPairs(Object key, Object val) for each input key/value pair to generate the desired aggregation id/value pairs. +The down stream combiner/reducer will interpret these pairs as adding one to the aggregator XXXX. +

    +Class ValueAggregatorBaseDescriptor is a base class that user plugin classes can extend. Here is the XML fragment specifying the user plugin class: +

    +
    +<property>
    +    <name>aggregator.descriptor.num</name>
    +    <value>1</value>
    +</property>
    +<property>
    +   <name>aggregator.descriptor.0</name>
    +   <value>UserDefined,org.apache.hadoop.mapred.lib.aggregate.examples.WordCountAggregatorDescriptor</value>
    +</property> 
    +
    +
    +Class ValueAggregatorBaseDescriptor itself provides a default implementation for generateKeyValPairs: +
    +
    +public ArrayList<Entry> generateKeyValPairs(Object key, Object val) {
    +   ArrayList<Entry> retv = new ArrayList<Entry>();     
    +   String countType = LONG_VALUE_SUM;
    +   String id = "record_count";
    +   retv.add(generateEntry(countType, id, ONE));
    +   return retv;
    +}
    +
    +
    +Thus, if no user plugin class is specified, the default behavior of the map/reduce job is to count the number of records (lines) in the imput files. +

    +During runtime, the mapper will invoke the generateKeyValPairs function for each input key/value pair, and emit the generated +key/value pairs: +

    +
    +public void map(WritableComparable key, Writable value,
    +            OutputCollector output, Reporter reporter) throws IOException {
    +   Iterator iter = this.aggregatorDescriptorList.iterator();
    +   while (iter.hasNext()) {
    +       ValueAggregatorDescriptor ad = (ValueAggregatorDescriptor) iter.next();
    +       Iterator<Entry> ens = ad.generateKeyValPairs(key, value).iterator();
    +       while (ens.hasNext()) {
    +           Entry en = ens.next();
    +           output.collect((WritableComparable)en.getKey(), (Writable)en.getValue());
    +       }
    +   }
    +}
    +
    +
    +The reducer will create an aggregator object for each key/value list pair, and perform the appropriate aggregation. +At the end, it will emit the aggregator's results: +
    +
    +public void reduce(WritableComparable key, Iterator values,
    +            OutputCollector output, Reporter reporter) throws IOException {
    +   String keyStr = key.toString();
    +   int pos = keyStr.indexOf(ValueAggregatorDescriptor.TYPE_SEPARATOR);
    +   String type = keyStr.substring(0,pos);
    +   keyStr = keyStr.substring(pos+ValueAggregatorDescriptor.TYPE_SEPARATOR.length());       
    +   ValueAggregator aggregator = 
    +       ValueAggregatorBaseDescriptor.generateValueAggregator(type);
    +   while (values.hasNext()) {
    +       aggregator.addNextValue(values.next());
    +   }         
    +   String val = aggregator.getReport();
    +   key = new Text(keyStr);
    +   output.collect(key, new Text(val)); 
    +}
    +
    +
    +In order to be able to use combiner, all the aggregation type be aggregators must be associative and communitive. +The following are the types supported:
      +
    • LongValueSum: sum over long values +
    • DoubleValueSum: sum over float/double values +
    • uniqValueCount: count the number of distinct values +
    • ValueHistogram: compute the histogram of values compute the minimum, maximum, media,average, standard deviation of numeric values +
    +

    +

    Create and run an application

    +

    +To create an application, the user needs to do the following things: +

    +1. Implement a user plugin: +

    +
    +import org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorBaseDescriptor;
    +import org.apache.hadoop.mapred.JobConf;
    +
    +public class WordCountAggregatorDescriptor extends ValueAggregatorBaseDescriptor {
    +   public void map(WritableComparable key, Writable value,
    +            OutputCollector output, Reporter reporter) throws IOException {
    +   }
    +   public void configure(JobConf job) {
    +    
    +   } 
    +}
    +
    +
    + +2. Create an xml file specifying the user plugin. +

    +3. Compile your java class and create a jar file, say wc.jar. + +

    +Finally, run the job: +

    +
    +        hadoop jar wc.jar org.apache.hadoop.mapred.lib.aggregate..ValueAggregatorJob indirs outdir numofreducers textinputformat|sequencefileinputformat spec_file
    +
    +
    +

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +The class org.apache.hadoop.mapred.pipes.Submitter has a public static +method to submit a job as a JobConf and a main method that takes an +application and optional configuration file, input directories, and +output directory. The cli for the main looks like: + +

    +bin/hadoop pipes \
    +  [-conf path] \
    +  [-input inputDir] \
    +  [-output outputDir] \
    +  [-jar applicationJarFile] \
    +  [-inputformat class] \
    +  [-map class] \
    +  [-partitioner class] \
    +  [-reduce class] \
    +  [-writer class] \
    +  [-program program url]
    +
    + +

    + +The application programs link against a thin C++ wrapper library that +handles the communication with the rest of the Hadoop system. The C++ +interface is "swigable" so that interfaces can be generated for python +and other scripting languages. All of the C++ functions and classes +are in the HadoopPipes namespace. The job may consist of any +combination of Java and C++ RecordReaders, Mappers, Paritioner, +Combiner, Reducer, and RecordWriter. + +

    + +Hadoop Pipes has a generic Java class for handling the mapper and +reducer (PipesMapRunner and PipesReducer). They fork off the +application program and communicate with it over a socket. The +communication is handled by the C++ wrapper library and the +PipesMapRunner and PipesReducer. + +

    + +The application program passes in a factory object that can create +the various objects needed by the framework to the runTask +function. The framework creates the Mapper or Reducer as +appropriate and calls the map or reduce method to invoke the +application's code. The JobConf is available to the application. + +

    + +The Mapper and Reducer objects get all of their inputs, outputs, and +context via context objects. The advantage of using the context +objects is that their interface can be extended with additional +methods without breaking clients. Although this interface is different +from the current Java interface, the plan is to migrate the Java +interface in this direction. + +

    + +Although the Java implementation is typed, the C++ interfaces of keys +and values is just a byte buffer. Since STL strings provide precisely +the right functionality and are standard, they will be used. The +decision to not use stronger types was to simplify the interface. + +

    + +The application can also define combiner functions. The combiner will +be run locally by the framework in the application process to avoid +the round trip to the Java process and back. Because the compare +function is not available in C++, the combiner will use memcmp to +sort the inputs to the combiner. This is not as general as the Java +equivalent, which uses the user's comparator, but should cover the +majority of the use cases. As the map function outputs key/value +pairs, they will be buffered. When the buffer is full, it will be +sorted and passed to the combiner. The output of the combiner will be +sent to the Java process. + +

    + +The application can also set a partition function to control which key +is given to a particular reduce. If a partition function is not +defined, the Java one will be used. The partition function will be +called by the C++ framework before the key/value pair is sent back to +Java.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When constructing the instance, if the factory property + contextName.class exists, + its value is taken to be the name of the class to instantiate. Otherwise, + the default is to create an instance of + org.apache.hadoop.metrics.spi.NullContext, which is a + dummy "no-op" context which will cause all metric data to be discarded. + + @param contextName the name of the context + @return the named MetricsContext]]> + + + + + + + + + + + + + + When the instance is constructed, this method checks if the file + hadoop-metrics.properties exists on the class path. If it + exists, it must be in the format defined by java.util.Properties, and all + the properties in the file are set as attributes on the newly created + ContextFactory instance. + + @return the singleton ContextFactory instance]]> + + + + getFactory() method.]]> + + + + + + + + + + + + + + + + + + + startMonitoring() again after calling + this. + @see #close()]]> + + + + + + + + + + + + + + + + recordName. + Throws an exception if the metrics implementation is configured with a fixed + set of record names and recordName is not in that set. + + @param recordName the name of the record + @throws MetricsException if recordName conflicts with configuration data]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A record name identifies the kind of data to be reported. For example, a + program reporting statistics relating to the disks on a computer might use + a record name "diskStats".

    + + A record has zero or more tags. A tag has a name and a value. To + continue the example, the "diskStats" record might use a tag named + "diskName" to identify a particular disk. Sometimes it is useful to have + more than one tag, so there might also be a "diskType" with value "ide" or + "scsi" or whatever.

    + + A record also has zero or more metrics. These are the named + values that are to be reported to the metrics system. In the "diskStats" + example, possible metric names would be "diskPercentFull", "diskPercentBusy", + "kbReadPerSecond", etc.

    + + The general procedure for using a MetricsRecord is to fill in its tag and + metric values, and then call update() to pass the record to the + client library. + Metric data is not immediately sent to the metrics system + each time that update() is called. + An internal table is maintained, identified by the record name. This + table has columns + corresponding to the tag and the metric names, and rows + corresponding to each unique set of tag values. An update + either modifies an existing row in the table, or adds a new row with a set of + tag values that are different from all the other rows. Note that if there + are no tags, then there can be at most one row in the table.

    + + Once a row is added to the table, its data will be sent to the metrics system + on every timer period, whether or not it has been updated since the previous + timer period. If this is inappropriate, for example if metrics were being + reported by some transient object in an application, the remove() + method can be used to remove the row and thus stop the data from being + sent.

    + + Note that the update() method is atomic. This means that it is + safe for different threads to be updating the same metric. More precisely, + it is OK for different threads to call update() on MetricsRecord instances + with the same set of tag names and tag values. Different threads should + not use the same MetricsRecord instance at the same time.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MetricsContext.registerUpdater().]]> + + + + + +The API is abstract so that it can be implemented on top of +a variety of metrics client libraries. The choice of +client library is a configuration option, and different +modules within the same application can use +different metrics implementation libraries. +

    +Sub-packages: +

    +
    org.apache.hadoop.metrics.spi
    +
    The abstract Server Provider Interface package. Those wishing to + integrate the metrics API with a particular metrics client library should + extend this package.
    + +
    org.apache.hadoop.metrics.file
    +
    An implementation package which writes the metric data to + a file, or sends it to the standard output stream.
    + +
    org.apache.hadoop.metrics.ganglia
    +
    An implementation package which sends metric data to + Ganglia.
    +
    + +

    Introduction to the Metrics API

    + +Here is a simple example of how to use this package to report a single +metric value: +
    +    private ContextFactory contextFactory = ContextFactory.getFactory();
    +    
    +    void reportMyMetric(float myMetric) {
    +        MetricsContext myContext = contextFactory.getContext("myContext");
    +        MetricsRecord myRecord = myContext.getRecord("myRecord");
    +        myRecord.setMetric("myMetric", myMetric);
    +        myRecord.update();
    +    }
    +
    + +In this example there are three names: +
    +
    myContext
    +
    The context name will typically identify either the application, or else a + module within an application or library.
    + +
    myRecord
    +
    The record name generally identifies some entity for which a set of + metrics are to be reported. For example, you could have a record named + "cacheStats" for reporting a number of statistics relating to the usage of + some cache in your application.
    + +
    myMetric
    +
    This identifies a particular metric. For example, you might have metrics + named "cache_hits" and "cache_misses". +
    +
    + +

    Tags

    + +In some cases it is useful to have multiple records with the same name. For +example, suppose that you want to report statistics about each disk on a computer. +In this case, the record name would be something like "diskStats", but you also +need to identify the disk which is done by adding a tag to the record. +The code could look something like this: +
    +    private MetricsRecord diskStats =
    +            contextFactory.getContext("myContext").getRecord("diskStats");
    +            
    +    void reportDiskMetrics(String diskName, float diskBusy, float diskUsed) {
    +        diskStats.setTag("diskName", diskName);
    +        diskStats.setMetric("diskBusy", diskBusy);
    +        diskStats.setMetric("diskUsed", diskUsed);
    +        diskStats.update();
    +    }
    +
    + +

    Buffering and Callbacks

    + +Data is not sent immediately to the metrics system when +MetricsRecord.update() is called. Instead it is stored in an +internal table, and the contents of the table are sent periodically. +This can be important for two reasons: +
      +
    1. It means that a programmer is free to put calls to this API in an + inner loop, since updates can be very frequent without slowing down + the application significantly.
    2. +
    3. Some implementations can gain efficiency by combining many metrics + into a single UDP message.
    4. +
    + +The API provides a timer-based callback via the +registerUpdater() method. The benefit of this +versus using java.util.Timer is that the callbacks will be done +immediately before sending the data, making the data as current as possible. + +

    Configuration

    + +It is possible to programmatically examine and modify configuration data +before creating a context, like this: +
    +    ContextFactory factory = ContextFactory.getFactory();
    +    ... examine and/or modify factory attributes ...
    +    MetricsContext context = factory.getContext("myContext");
    +
    +The factory attributes can be examined and modified using the following +ContextFactorymethods: +
      +
    • Object getAttribute(String attributeName)
    • +
    • String[] getAttributeNames()
    • +
    • void setAttribute(String name, Object value)
    • +
    • void removeAttribute(attributeName)
    • +
    + +

    +ContextFactory.getFactory() initializes the factory attributes by +reading the properties file hadoop-metrics.properties if it exists +on the class path. + +

    +A factory attribute named: +

    +contextName.class
    +
    +should have as its value the fully qualified name of the class to be +instantiated by a call of the CodeFactory method +getContext(contextName). If this factory attribute is not +specified, the default is to instantiate +org.apache.hadoop.metrics.file.FileContext. + +

    +Other factory attributes are specific to a particular implementation of this +API and are documented elsewhere. For example, configuration attributes for +the file and Ganglia implementations can be found in the javadoc for +their respective packages.]]> + + + + + + + + + + + + + + + + + + + + + + + fileName attribute, + if specified. Otherwise the data will be written to standard + output.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is configured by setting ContextFactory attributes which in turn + are usually configured through a properties file. All the attributes are + prefixed by the contextName. For example, the properties file might contain: +

    + myContextName.fileName=/tmp/metrics.log
    + myContextName.period=5
    + 
    ]]> +
    + + + + +These are the implementation specific factory attributes +(See ContextFactory.getFactory()): + +
    +
    contextName.fileName
    +
    The path of the file to which metrics in context contextName + are to be appended. If this attribute is not specified, the metrics + are written to standard output by default.
    + +
    contextName.period
    +
    The period in seconds on which the metric data is written to the + file.
    + +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + +Implementation of the metrics package that sends metric data to +Ganglia. +Programmers should not normally need to use this package directly. Instead +they should use org.hadoop.metrics. + +

    +These are the implementation specific factory attributes +(See ContextFactory.getFactory()): + +

    +
    contextName.servers
    +
    Space and/or comma separated sequence of servers to which UDP + messages should be sent.
    + +
    contextName.period
    +
    The period in seconds on which the metric data is sent to the + server(s).
    + +
    contextName.units.recordName.metricName
    +
    The units for the specified metric in the specified record.
    + +
    contextName.slope.recordName.metricName
    +
    The slope for the specified metric in the specified record.
    + +
    contextName.tmax.recordName.metricName
    +
    The tmax for the specified metric in the specified record.
    + +
    contextName.dmax.recordName.metricName
    +
    The dmax for the specified metric in the specified record.
    + +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + contextName.tableName. The returned map consists of + those attributes with the contextName and tableName stripped off.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + recordName. + Throws an exception if the metrics implementation is configured with a fixed + set of record names and recordName is not in that set. + + @param recordName the name of the record + @throws MetricsException if recordName conflicts with configuration data]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class implements the internal table of metric data, and the timer + on which data is to be sent to the metrics system. Subclasses must + override the abstract emitRecord method in order to transmit + the data.

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + update + and remove().]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hostname or hostname:port. If + the specs string is null, defaults to localhost:defaultPort. + + @return a list of InetSocketAddress objects.]]> + + + + + + + + + org.apache.hadoop.metrics.file and +org.apache.hadoop.metrics.ganglia.

    + +Plugging in an implementation involves writing a concrete subclass of +AbstractMetricsContext. The subclass should get its + configuration information using the getAttribute(attributeName) + method.]]> + + + + + + + + + + + + + ,name=" + Where the and are the supplied parameters + + @param serviceName + @param nameName + @param theMbean - the MBean to register + @return the named used to register the MBean]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hadoop.rpc.socket.factory.class.<ClassName>. When no + such parameter exists then fall back on the default socket factory as + configured by hadoop.rpc.socket.factory.class.default. If + this default socket factory is not configured, then fall back on the JVM + default socket factory. + + @param conf the configuration + @param clazz the class (usually a {@link VersionedProtocol}) + @return a socket factory]]> + + + + + + hadoop.rpc.socket.factory.default + + @param conf the configuration + @return the default socket factory as specified in the configuration or + the JVM default socket factory if the configuration does not + contain a default socket factory property.]]> + + + + + + + + + + + + + : + ://:/]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + From documentation for {@link #getInputStream(Socket, long)}:
    + Returns InputStream for the socket. If the socket has an associated + SocketChannel then it returns a + {@link SocketInputStream} with the given timeout. If the socket does not + have a channel, {@link Socket#getInputStream()} is returned. In the later + case, the timeout argument is ignored and the timeout set with + {@link Socket#setSoTimeout(int)} applies for reads.

    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getInputStream()}. + + @see #getInputStream(Socket, long) + + @param socket + @return InputStream for reading from the socket. + @throws IOException]]> +
    +
    + + + + + +
    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getInputStream()}. + + @see Socket#getChannel() + + @param socket + @param timeout timeout in milliseconds. This may not always apply. zero + for waiting as long as necessary. + @return InputStream for reading from the socket. + @throws IOException]]> +
    +
    + + + + +
    + + From documentation for {@link #getOutputStream(Socket, long)} :
    + Returns OutputStream for the socket. If the socket has an associated + SocketChannel then it returns a + {@link SocketOutputStream} with the given timeout. If the socket does not + have a channel, {@link Socket#getOutputStream()} is returned. In the later + case, the timeout argument is ignored and the write will wait until + data is available.

    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getOutputStream()}. + + @see #getOutputStream(Socket, long) + + @param socket + @return OutputStream for writing to the socket. + @throws IOException]]> +
    +
    + + + + + +
    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getOutputStream()}. + + @see Socket#getChannel() + + @param socket + @param timeout timeout in milliseconds. This may not always apply. zero + for waiting as long as necessary. + @return OutputStream for writing to the socket. + @throws IOException]]> +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + node + + @param node + a node + @return true if node is already in the tree; false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + scope + if scope starts with ~, choose one from the all nodes except for the + ones in scope; otherwise, choose one from scope + @param scope range of nodes from which a node will be choosen + @return the choosen node]]> + + + + + + + scope but not in excludedNodes + if scope starts with ~, return the number of nodes that are not + in scope and excludedNodes; + @param scope a path string that may start with ~ + @param excludedNodes a list of nodes + @return number of available nodes]]> + + + + + + + + + + + + reader + It linearly scans the array, if a local node is found, swap it with + the first element of the array. + If a local rack node is found, swap it with the first element following + the local node. + If neither local node or local rack node is found, put a random replica + location at postion 0. + It leaves the rest nodes untouched.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + Create a new input stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + + @see SocketInputStream#SocketInputStream(ReadableByteChannel, long) + + @param socket should have a channel associated with it. + @param timeout timeout timeout in milliseconds. must not be negative. + @throws IOException]]> +
    +
    + + + +
    + + Create a new input stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + @see SocketInputStream#SocketInputStream(ReadableByteChannel, long) + + @param socket should have a channel associated with it. + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + +
    + + Create a new ouput stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + + @see SocketOutputStream#SocketOutputStream(WritableByteChannel, long) + + @param socket should have a channel associated with it. + @param timeout timeout timeout in milliseconds. must not be negative. + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + = getCount(). + @param newCapacity The new capacity in bytes.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Index idx = startVector(...); + while (!idx.done()) { + .... // read element of a vector + idx.incr(); + } + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Introduction + + Software systems of any significant complexity require mechanisms for data +interchange with the outside world. These interchanges typically involve the +marshaling and unmarshaling of logical units of data to and from data streams +(files, network connections, memory buffers etc.). Applications usually have +some code for serializing and deserializing the data types that they manipulate +embedded in them. The work of serialization has several features that make +automatic code generation for it worthwhile. Given a particular output encoding +(binary, XML, etc.), serialization of primitive types and simple compositions +of primitives (structs, vectors etc.) is a very mechanical task. Manually +written serialization code can be susceptible to bugs especially when records +have a large number of fields or a record definition changes between software +versions. Lastly, it can be very useful for applications written in different +programming languages to be able to share and interchange data. This can be +made a lot easier by describing the data records manipulated by these +applications in a language agnostic manner and using the descriptions to derive +implementations of serialization in multiple target languages. + +This document describes Hadoop Record I/O, a mechanism that is aimed +at +

      +
    • enabling the specification of simple serializable data types (records) +
    • enabling the generation of code in multiple target languages for +marshaling and unmarshaling such types +
    • providing target language specific support that will enable application +programmers to incorporate generated code into their applications +
    + +The goals of Hadoop Record I/O are similar to those of mechanisms such as XDR, +ASN.1, PADS and ICE. While these systems all include a DDL that enables +the specification of most record types, they differ widely in what else they +focus on. The focus in Hadoop Record I/O is on data marshaling and +multi-lingual support. We take a translator-based approach to serialization. +Hadoop users have to describe their data in a simple data description +language. The Hadoop DDL translator rcc generates code that users +can invoke in order to read/write their data from/to simple stream +abstractions. Next we list explicitly some of the goals and non-goals of +Hadoop Record I/O. + + +

    Goals

    + +
      +
    • Support for commonly used primitive types. Hadoop should include as +primitives commonly used builtin types from programming languages we intend to +support. + +
    • Support for common data compositions (including recursive compositions). +Hadoop should support widely used composite types such as structs and +vectors. + +
    • Code generation in multiple target languages. Hadoop should be capable of +generating serialization code in multiple target languages and should be +easily extensible to new target languages. The initial target languages are +C++ and Java. + +
    • Support for generated target languages. Hadooop should include support +in the form of headers, libraries, packages for supported target languages +that enable easy inclusion and use of generated code in applications. + +
    • Support for multiple output encodings. Candidates include +packed binary, comma-separated text, XML etc. + +
    • Support for specifying record types in a backwards/forwards compatible +manner. This will probably be in the form of support for optional fields in +records. This version of the document does not include a description of the +planned mechanism, we intend to include it in the next iteration. + +
    + +

    Non-Goals

    + +
      +
    • Serializing existing arbitrary C++ classes. +
    • Serializing complex data structures such as trees, linked lists etc. +
    • Built-in indexing schemes, compression, or check-sums. +
    • Dynamic construction of objects from an XML schema. +
    + +The remainder of this document describes the features of Hadoop record I/O +in more detail. Section 2 describes the data types supported by the system. +Section 3 lays out the DDL syntax with some examples of simple records. +Section 4 describes the process of code generation with rcc. Section 5 +describes target language mappings and support for Hadoop types. We include a +fairly complete description of C++ mappings with intent to include Java and +others in upcoming iterations of this document. The last section talks about +supported output encodings. + + +

    Data Types and Streams

    + +This section describes the primitive and composite types supported by Hadoop. +We aim to support a set of types that can be used to simply and efficiently +express a wide range of record types in different programming languages. + +

    Primitive Types

    + +For the most part, the primitive types of Hadoop map directly to primitive +types in high level programming languages. Special cases are the +ustring (a Unicode string) and buffer types, which we believe +find wide use and which are usually implemented in library code and not +available as language built-ins. Hadoop also supplies these via library code +when a target language built-in is not present and there is no widely +adopted "standard" implementation. The complete list of primitive types is: + +
      +
    • byte: An 8-bit unsigned integer. +
    • boolean: A boolean value. +
    • int: A 32-bit signed integer. +
    • long: A 64-bit signed integer. +
    • float: A single precision floating point number as described by + IEEE-754. +
    • double: A double precision floating point number as described by + IEEE-754. +
    • ustring: A string consisting of Unicode characters. +
    • buffer: An arbitrary sequence of bytes. +
    + + +

    Composite Types

    +Hadoop supports a small set of composite types that enable the description +of simple aggregate types and containers. A composite type is serialized +by sequentially serializing it constituent elements. The supported +composite types are: + +
      + +
    • record: An aggregate type like a C-struct. This is a list of +typed fields that are together considered a single unit of data. A record +is serialized by sequentially serializing its constituent fields. In addition +to serialization a record has comparison operations (equality and less-than) +implemented for it, these are defined as memberwise comparisons. + +
    • vector: A sequence of entries of the same data type, primitive +or composite. + +
    • map: An associative container mapping instances of a key type to +instances of a value type. The key and value types may themselves be primitive +or composite types. + +
    + +

    Streams

    + +Hadoop generates code for serializing and deserializing record types to +abstract streams. For each target language Hadoop defines very simple input +and output stream interfaces. Application writers can usually develop +concrete implementations of these by putting a one method wrapper around +an existing stream implementation. + + +

    DDL Syntax and Examples

    + +We now describe the syntax of the Hadoop data description language. This is +followed by a few examples of DDL usage. + +

    Hadoop DDL Syntax

    + +
    
    +recfile = *include module *record
    +include = "include" path
    +path = (relative-path / absolute-path)
    +module = "module" module-name
    +module-name = name *("." name)
    +record := "class" name "{" 1*(field) "}"
    +field := type name ";"
    +name :=  ALPHA (ALPHA / DIGIT / "_" )*
    +type := (ptype / ctype)
    +ptype := ("byte" / "boolean" / "int" |
    +          "long" / "float" / "double"
    +          "ustring" / "buffer")
    +ctype := (("vector" "<" type ">") /
    +          ("map" "<" type "," type ">" ) ) / name)
    +
    + +A DDL file describes one or more record types. It begins with zero or +more include declarations, a single mandatory module declaration +followed by zero or more class declarations. The semantics of each of +these declarations are described below: + +
      + +
    • include: An include declaration specifies a DDL file to be +referenced when generating code for types in the current DDL file. Record types +in the current compilation unit may refer to types in all included files. +File inclusion is recursive. An include does not trigger code +generation for the referenced file. + +
    • module: Every Hadoop DDL file must have a single module +declaration that follows the list of includes and precedes all record +declarations. A module declaration identifies a scope within which +the names of all types in the current file are visible. Module names are +mapped to C++ namespaces, Java packages etc. in generated code. + +
    • class: Records types are specified through class +declarations. A class declaration is like a Java class declaration. +It specifies a named record type and a list of fields that constitute records +of the type. Usage is illustrated in the following examples. + +
    + +

    Examples

    + +
      +
    • A simple DDL file links.jr with just one record declaration. +
      
      +module links {
      +    class Link {
      +        ustring URL;
      +        boolean isRelative;
      +        ustring anchorText;
      +    };
      +}
      +
      + +
    • A DDL file outlinks.jr which includes another +
      
      +include "links.jr"
      +
      +module outlinks {
      +    class OutLinks {
      +        ustring baseURL;
      +        vector outLinks;
      +    };
      +}
      +
      +
    + +

    Code Generation

    + +The Hadoop translator is written in Java. Invocation is done by executing a +wrapper shell script named named rcc. It takes a list of +record description files as a mandatory argument and an +optional language argument (the default is Java) --language or +-l. Thus a typical invocation would look like: +
    
    +$ rcc -l C++  ...
    +
    + + +

    Target Language Mappings and Support

    + +For all target languages, the unit of code generation is a record type. +For each record type, Hadoop generates code for serialization and +deserialization, record comparison and access to record members. + +

    C++

    + +Support for including Hadoop generated C++ code in applications comes in the +form of a header file recordio.hh which needs to be included in source +that uses Hadoop types and a library librecordio.a which applications need +to be linked with. The header declares the Hadoop C++ namespace which defines +appropriate types for the various primitives, the basic interfaces for +records and streams and enumerates the supported serialization encodings. +Declarations of these interfaces and a description of their semantics follow: + +
    
    +namespace hadoop {
    +
    +  enum RecFormat { kBinary, kXML, kCSV };
    +
    +  class InStream {
    +  public:
    +    virtual ssize_t read(void *buf, size_t n) = 0;
    +  };
    +
    +  class OutStream {
    +  public:
    +    virtual ssize_t write(const void *buf, size_t n) = 0;
    +  };
    +
    +  class IOError : public runtime_error {
    +  public:
    +    explicit IOError(const std::string& msg);
    +  };
    +
    +  class IArchive;
    +  class OArchive;
    +
    +  class RecordReader {
    +  public:
    +    RecordReader(InStream& in, RecFormat fmt);
    +    virtual ~RecordReader(void);
    +
    +    virtual void read(Record& rec);
    +  };
    +
    +  class RecordWriter {
    +  public:
    +    RecordWriter(OutStream& out, RecFormat fmt);
    +    virtual ~RecordWriter(void);
    +
    +    virtual void write(Record& rec);
    +  };
    +
    +
    +  class Record {
    +  public:
    +    virtual std::string type(void) const = 0;
    +    virtual std::string signature(void) const = 0;
    +  protected:
    +    virtual bool validate(void) const = 0;
    +
    +    virtual void
    +    serialize(OArchive& oa, const std::string& tag) const = 0;
    +
    +    virtual void
    +    deserialize(IArchive& ia, const std::string& tag) = 0;
    +  };
    +}
    +
    + +
      + +
    • RecFormat: An enumeration of the serialization encodings supported +by this implementation of Hadoop. + +
    • InStream: A simple abstraction for an input stream. This has a +single public read method that reads n bytes from the stream into +the buffer buf. Has the same semantics as a blocking read system +call. Returns the number of bytes read or -1 if an error occurs. + +
    • OutStream: A simple abstraction for an output stream. This has a +single write method that writes n bytes to the stream from the +buffer buf. Has the same semantics as a blocking write system +call. Returns the number of bytes written or -1 if an error occurs. + +
    • RecordReader: A RecordReader reads records one at a time from +an underlying stream in a specified record format. The reader is instantiated +with a stream and a serialization format. It has a read method that +takes an instance of a record and deserializes the record from the stream. + +
    • RecordWriter: A RecordWriter writes records one at a +time to an underlying stream in a specified record format. The writer is +instantiated with a stream and a serialization format. It has a +write method that takes an instance of a record and serializes the +record to the stream. + +
    • Record: The base class for all generated record types. This has two +public methods type and signature that return the typename and the +type signature of the record. + +
    + +Two files are generated for each record file (note: not for each record). If a +record file is named "name.jr", the generated files are +"name.jr.cc" and "name.jr.hh" containing serialization +implementations and record type declarations respectively. + +For each record in the DDL file, the generated header file will contain a +class definition corresponding to the record type, method definitions for the +generated type will be present in the '.cc' file. The generated class will +inherit from the abstract class hadoop::Record. The DDL files +module declaration determines the namespace the record belongs to. +Each '.' delimited token in the module declaration results in the +creation of a namespace. For instance, the declaration module docs.links +results in the creation of a docs namespace and a nested +docs::links namespace. In the preceding examples, the Link class +is placed in the links namespace. The header file corresponding to +the links.jr file will contain: + +
    
    +namespace links {
    +  class Link : public hadoop::Record {
    +    // ....
    +  };
    +};
    +
    + +Each field within the record will cause the generation of a private member +declaration of the appropriate type in the class declaration, and one or more +acccessor methods. The generated class will implement the serialize and +deserialize methods defined in hadoop::Record+. It will also +implement the inspection methods type and signature from +hadoop::Record. A default constructor and virtual destructor will also +be generated. Serialization code will read/write records into streams that +implement the hadoop::InStream and the hadoop::OutStream interfaces. + +For each member of a record an accessor method is generated that returns +either the member or a reference to the member. For members that are returned +by value, a setter method is also generated. This is true for primitive +data members of the types byte, int, long, boolean, float and +double. For example, for a int field called MyField the folowing +code is generated. + +
    
    +...
    +private:
    +  int32_t mMyField;
    +  ...
    +public:
    +  int32_t getMyField(void) const {
    +    return mMyField;
    +  };
    +
    +  void setMyField(int32_t m) {
    +    mMyField = m;
    +  };
    +  ...
    +
    + +For a ustring or buffer or composite field. The generated code +only contains accessors that return a reference to the field. A const +and a non-const accessor are generated. For example: + +
    
    +...
    +private:
    +  std::string mMyBuf;
    +  ...
    +public:
    +
    +  std::string& getMyBuf() {
    +    return mMyBuf;
    +  };
    +
    +  const std::string& getMyBuf() const {
    +    return mMyBuf;
    +  };
    +  ...
    +
    + +

    Examples

    + +Suppose the inclrec.jr file contains: +
    
    +module inclrec {
    +    class RI {
    +        int      I32;
    +        double   D;
    +        ustring  S;
    +    };
    +}
    +
    + +and the testrec.jr file contains: + +
    
    +include "inclrec.jr"
    +module testrec {
    +    class R {
    +        vector VF;
    +        RI            Rec;
    +        buffer        Buf;
    +    };
    +}
    +
    + +Then the invocation of rcc such as: +
    
    +$ rcc -l c++ inclrec.jr testrec.jr
    +
    +will result in generation of four files: +inclrec.jr.{cc,hh} and testrec.jr.{cc,hh}. + +The inclrec.jr.hh will contain: + +
    
    +#ifndef _INCLREC_JR_HH_
    +#define _INCLREC_JR_HH_
    +
    +#include "recordio.hh"
    +
    +namespace inclrec {
    +  
    +  class RI : public hadoop::Record {
    +
    +  private:
    +
    +    int32_t      I32;
    +    double       D;
    +    std::string  S;
    +
    +  public:
    +
    +    RI(void);
    +    virtual ~RI(void);
    +
    +    virtual bool operator==(const RI& peer) const;
    +    virtual bool operator<(const RI& peer) const;
    +
    +    virtual int32_t getI32(void) const { return I32; }
    +    virtual void setI32(int32_t v) { I32 = v; }
    +
    +    virtual double getD(void) const { return D; }
    +    virtual void setD(double v) { D = v; }
    +
    +    virtual std::string& getS(void) const { return S; }
    +    virtual const std::string& getS(void) const { return S; }
    +
    +    virtual std::string type(void) const;
    +    virtual std::string signature(void) const;
    +
    +  protected:
    +
    +    virtual void serialize(hadoop::OArchive& a) const;
    +    virtual void deserialize(hadoop::IArchive& a);
    +  };
    +} // end namespace inclrec
    +
    +#endif /* _INCLREC_JR_HH_ */
    +
    +
    + +The testrec.jr.hh file will contain: + + +
    
    +
    +#ifndef _TESTREC_JR_HH_
    +#define _TESTREC_JR_HH_
    +
    +#include "inclrec.jr.hh"
    +
    +namespace testrec {
    +  class R : public hadoop::Record {
    +
    +  private:
    +
    +    std::vector VF;
    +    inclrec::RI        Rec;
    +    std::string        Buf;
    +
    +  public:
    +
    +    R(void);
    +    virtual ~R(void);
    +
    +    virtual bool operator==(const R& peer) const;
    +    virtual bool operator<(const R& peer) const;
    +
    +    virtual std::vector& getVF(void) const;
    +    virtual const std::vector& getVF(void) const;
    +
    +    virtual std::string& getBuf(void) const ;
    +    virtual const std::string& getBuf(void) const;
    +
    +    virtual inclrec::RI& getRec(void) const;
    +    virtual const inclrec::RI& getRec(void) const;
    +    
    +    virtual bool serialize(hadoop::OutArchive& a) const;
    +    virtual bool deserialize(hadoop::InArchive& a);
    +    
    +    virtual std::string type(void) const;
    +    virtual std::string signature(void) const;
    +  };
    +}; // end namespace testrec
    +#endif /* _TESTREC_JR_HH_ */
    +
    +
    + +

    Java

    + +Code generation for Java is similar to that for C++. A Java class is generated +for each record type with private members corresponding to the fields. Getters +and setters for fields are also generated. Some differences arise in the +way comparison is expressed and in the mapping of modules to packages and +classes to files. For equality testing, an equals method is generated +for each record type. As per Java requirements a hashCode method is also +generated. For comparison a compareTo method is generated for each +record type. This has the semantics as defined by the Java Comparable +interface, that is, the method returns a negative integer, zero, or a positive +integer as the invoked object is less than, equal to, or greater than the +comparison parameter. + +A .java file is generated per record type as opposed to per DDL +file as in C++. The module declaration translates to a Java +package declaration. The module name maps to an identical Java package +name. In addition to this mapping, the DDL compiler creates the appropriate +directory hierarchy for the package and places the generated .java +files in the correct directories. + +

    Mapping Summary

    + +
    
    +DDL Type        C++ Type            Java Type 
    +
    +boolean         bool                boolean
    +byte            int8_t              byte
    +int             int32_t             int
    +long            int64_t             long
    +float           float               float
    +double          double              double
    +ustring         std::string         java.lang.String
    +buffer          std::string         org.apache.hadoop.record.Buffer
    +class type      class type          class type
    +vector    std::vector   java.util.ArrayList
    +map  std::map java.util.TreeMap
    +
    + +

    Data encodings

    + +This section describes the format of the data encodings supported by Hadoop. +Currently, three data encodings are supported, namely binary, CSV and XML. + +

    Binary Serialization Format

    + +The binary data encoding format is fairly dense. Serialization of composite +types is simply defined as a concatenation of serializations of the constituent +elements (lengths are included in vectors and maps). + +Composite types are serialized as follows: +
      +
    • class: Sequence of serialized members. +
    • vector: The number of elements serialized as an int. Followed by a +sequence of serialized elements. +
    • map: The number of key value pairs serialized as an int. Followed +by a sequence of serialized (key,value) pairs. +
    + +Serialization of primitives is more interesting, with a zero compression +optimization for integral types and normalization to UTF-8 for strings. +Primitive types are serialized as follows: + +
      +
    • byte: Represented by 1 byte, as is. +
    • boolean: Represented by 1-byte (0 or 1) +
    • int/long: Integers and longs are serialized zero compressed. +Represented as 1-byte if -120 <= value < 128. Otherwise, serialized as a +sequence of 2-5 bytes for ints, 2-9 bytes for longs. The first byte represents +the number of trailing bytes, N, as the negative number (-120-N). For example, +the number 1024 (0x400) is represented by the byte sequence 'x86 x04 x00'. +This doesn't help much for 4-byte integers but does a reasonably good job with +longs without bit twiddling. +
    • float/double: Serialized in IEEE 754 single and double precision +format in network byte order. This is the format used by Java. +
    • ustring: Serialized as 4-byte zero compressed length followed by +data encoded as UTF-8. Strings are normalized to UTF-8 regardless of native +language representation. +
    • buffer: Serialized as a 4-byte zero compressed length followed by the +raw bytes in the buffer. +
    + + +

    CSV Serialization Format

    + +The CSV serialization format has a lot more structure than the "standard" +Excel CSV format, but we believe the additional structure is useful because + +
      +
    • it makes parsing a lot easier without detracting too much from legibility +
    • the delimiters around composites make it obvious when one is reading a +sequence of Hadoop records +
    + +Serialization formats for the various types are detailed in the grammar that +follows. The notable feature of the formats is the use of delimiters for +indicating the certain field types. + +
      +
    • A string field begins with a single quote ('). +
    • A buffer field begins with a sharp (#). +
    • A class, vector or map begins with 's{', 'v{' or 'm{' respectively and +ends with '}'. +
    + +The CSV format can be described by the following grammar: + +
    
    +record = primitive / struct / vector / map
    +primitive = boolean / int / long / float / double / ustring / buffer
    +
    +boolean = "T" / "F"
    +int = ["-"] 1*DIGIT
    +long = ";" ["-"] 1*DIGIT
    +float = ["-"] 1*DIGIT "." 1*DIGIT ["E" / "e" ["-"] 1*DIGIT]
    +double = ";" ["-"] 1*DIGIT "." 1*DIGIT ["E" / "e" ["-"] 1*DIGIT]
    +
    +ustring = "'" *(UTF8 char except NULL, LF, % and , / "%00" / "%0a" / "%25" / "%2c" )
    +
    +buffer = "#" *(BYTE except NULL, LF, % and , / "%00" / "%0a" / "%25" / "%2c" )
    +
    +struct = "s{" record *("," record) "}"
    +vector = "v{" [record *("," record)] "}"
    +map = "m{" [*(record "," record)] "}"
    +
    + +

    XML Serialization Format

    + +The XML serialization format is the same used by Apache XML-RPC +(http://ws.apache.org/xmlrpc/types.html). This is an extension of the original +XML-RPC format and adds some additional data types. All record I/O types are +not directly expressible in this format, and access to a DDL is required in +order to convert these to valid types. All types primitive or composite are +represented by <value> elements. The particular XML-RPC type is +indicated by a nested element in the <value> element. The encoding for +records is always UTF-8. Primitive types are serialized as follows: + +
      +
    • byte: XML tag <ex:i1>. Values: 1-byte unsigned +integers represented in US-ASCII +
    • boolean: XML tag <boolean>. Values: "0" or "1" +
    • int: XML tags <i4> or <int>. Values: 4-byte +signed integers represented in US-ASCII. +
    • long: XML tag <ex:i8>. Values: 8-byte signed integers +represented in US-ASCII. +
    • float: XML tag <ex:float>. Values: Single precision +floating point numbers represented in US-ASCII. +
    • double: XML tag <double>. Values: Double precision +floating point numbers represented in US-ASCII. +
    • ustring: XML tag <;string>. Values: String values +represented as UTF-8. XML does not permit all Unicode characters in literal +data. In particular, NULLs and control chars are not allowed. Additionally, +XML processors are required to replace carriage returns with line feeds and to +replace CRLF sequences with line feeds. Programming languages that we work +with do not impose these restrictions on string types. To work around these +restrictions, disallowed characters and CRs are percent escaped in strings. +The '%' character is also percent escaped. +
    • buffer: XML tag <string&>. Values: Arbitrary binary +data. Represented as hexBinary, each byte is replaced by its 2-byte +hexadecimal representation. +
    + +Composite types are serialized as follows: + +
      +
    • class: XML tag <struct>. A struct is a sequence of +<member> elements. Each <member> element has a <name> +element and a <value> element. The <name> is a string that must +match /[a-zA-Z][a-zA-Z0-9_]*/. The value of the member is represented +by a <value> element. + +
    • vector: XML tag <array<. An <array> contains a +single <data> element. The <data> element is a sequence of +<value> elements each of which represents an element of the vector. + +
    • map: XML tag <array>. Same as vector. + +
    + +For example: + +
    
    +class {
    +  int           MY_INT;            // value 5
    +  vector MY_VEC;            // values 0.1, -0.89, 2.45e4
    +  buffer        MY_BUF;            // value '\00\n\tabc%'
    +}
    +
    + +is serialized as + +
    
    +<value>
    +  <struct>
    +    <member>
    +      <name>MY_INT</name>
    +      <value><i4>5</i4></value>
    +    </member>
    +    <member>
    +      <name>MY_VEC</name>
    +      <value>
    +        <array>
    +          <data>
    +            <value><ex:float>0.1</ex:float></value>
    +            <value><ex:float>-0.89</ex:float></value>
    +            <value><ex:float>2.45e4</ex:float></value>
    +          </data>
    +        </array>
    +      </value>
    +    </member>
    +    <member>
    +      <name>MY_BUF</name>
    +      <value><string>%00\n\tabc%25</string></value>
    +    </member>
    +  </struct>
    +</value> 
    +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This task takes the given record definition files and compiles them into + java or c++ + files. It is then up to the user to compile the generated files. + +

    The task requires the file or the nested fileset element to be + specified. Optional attributes are language (set the output + language, default is "java"), + destdir (name of the destination directory for generated java/c++ + code, default is ".") and failonerror (specifies error handling + behavior. default is true). +

    Usage

    +
    + <recordcc
    +       destdir="${basedir}/gensrc"
    +       language="java">
    +   <fileset include="**\/*.jr" />
    + </recordcc>
    + 
    ]]> +
    +
    + +
    + + + + + + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ugi as a comma separated string in + conf as a property attr + + The String starts with the user name followed by the default group names, + and other group names. + + @param conf configuration + @param attr property name + @param ugi a UnixUserGroupInformation]]> + + + + + + + + conf + + The object is expected to store with the property name attr + as a comma separated string that starts + with the user name followed by group names. + If the property name is not defined, return null. + It's assumed that there is only one UGI per user. If this user already + has a UGI in the ugi map, return the ugi in the map. + Otherwise, construct a UGI from the configuration, store it in the + ugi map and return it. + + @param conf configuration + @param attr property name + @return a UnixUGI + @throws LoginException if the stored string is ill-formatted.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This tool supports archiving and anaylzing (sort/grep) of log-files. + It takes as input + a) Input uri which will serve uris of the logs to be archived. + b) Output directory (not mandatory). + b) Directory on dfs to archive the logs. + c) The sort/grep patterns for analyzing the files and separator for boundaries. + Usage: + Logalyzer -archive -archiveDir -analysis -logs -grep -sort -separator +

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GenericOptionsParser to parse only the generic Hadoop + arguments. + + The array of string arguments other than the generic arguments can be + obtained by {@link #getRemainingArgs()}. + + @param conf the Configuration to modify. + @param args command-line arguments.]]> + + + + + GenericOptionsParser to parse given options as well + as generic Hadoop options. + + The resulting CommandLine object can be obtained by + {@link #getCommandLine()}. + + @param conf the configuration to modify + @param options options built by the caller + @param args User-specified arguments]]> + + + + + Strings containing the un-parsed arguments.]]> + + + + + CommandLine object + to process the parsed arguments. + + Note: If the object is created with + {@link #GenericOptionsParser(Configuration, String[])}, then returned + object will only contain parsed generic options. + + @return CommandLine representing list of arguments + parsed against Options descriptor.]]> + + + + + + + + + + GenericOptionsParser is a utility to parse command line + arguments generic to the Hadoop framework. + + GenericOptionsParser recognizes several standarad command + line arguments, enabling applications to easily specify a namenode, a + jobtracker, additional configuration resources etc. + +

    Generic Options

    + +

    The supported generic options are:

    +

    +     -conf <configuration file>     specify a configuration file
    +     -D <property=value>            use value for given property
    +     -fs <local|namenode:port>      specify a namenode
    +     -jt <local|jobtracker:port>    specify a job tracker
    + 

    + +

    The general command line syntax is:

    +

    + bin/hadoop command [genericOptions] [commandOptions]
    + 

    + +

    Generic command line arguments might modify + Configuration objects, given to constructors.

    + +

    The functionality is implemented using Commons CLI.

    + +

    Examples:

    +

    + $ bin/hadoop dfs -fs darwin:8020 -ls /data
    + list /data directory in dfs with namenode darwin:8020
    + 
    + $ bin/hadoop dfs -D fs.default.name=darwin:8020 -ls /data
    + list /data directory in dfs with namenode darwin:8020
    +     
    + $ bin/hadoop dfs -conf hadoop-site.xml -ls /data
    + list /data directory in dfs with conf specified in hadoop-site.xml
    +     
    + $ bin/hadoop job -D mapred.job.tracker=darwin:50020 -submit job.xml
    + submit a job to job tracker darwin:50020
    +     
    + $ bin/hadoop job -jt darwin:50020 -submit job.xml
    + submit a job to job tracker darwin:50020
    +     
    + $ bin/hadoop job -jt local -submit job.xml
    + submit a job to local runner
    + 

    + + @see Tool + @see ToolRunner]]> +
    +
    + + + + + + + + + Class<T>) of the + argument of type T. + @param The type of the argument + @param t the object to get it class + @return Class<T>]]> + + + + + + + List<T> to a an array of + T[]. + @param c the Class object of the items in the list + @param list the list to convert]]> + + + + + + List<T> to a an array of + T[]. + @param list the list to convert + @throws ArrayIndexOutOfBoundsException if the list is empty. + Use {@link #toArray(Class, List)} if the list may be empty.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if native-hadoop is loaded, + else false]]> + + + + + + true if native hadoop libraries, if present, can be + used for this job; false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { pq.top().change(); pq.adjustTop(); } + instead of
    +  { o = pq.pop(); o.change(); pq.push(o); }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Clients and/or applications can use the provided Progressable + to explicitly report progress to the Hadoop framework. This is especially + important for operations which take an insignificant amount of time since, + in-lieu of the reported progress, the framework has to assume that an error + has occured and time-out the operation.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Hadoop Pipes + or Hadoop Streaming. + + It also checks to ensure that we are running on a *nix platform else + (e.g. in Cygwin/Windows) it returns null. + @param job job configuration + @return a String[] with the ulimit command arguments or + null if we are running on a non *nix platform or + if the limit is unspecified.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Shell interface. + @param cmd shell command to execute. + @return the output of the executed command.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + Shell can be used to run unix commands like du or + df. It also offers facilities to gate commands by + time-intervals.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ShellCommandExecutorshould be used in cases where the output + of the command needs no explicit parsing and where the command, working + directory and the environment remains unchanged. The output of the command + is stored as-is and is expected to be small.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + charToEscape in the string + with the escape char escapeChar + + @param str string + @param escapeChar escape char + @param charToEscape the char to be escaped + @return an escaped string]]> + + + + + + + + + + + + + + charToEscape in the string + with the escape char escapeChar + + @param str string + @param escapeChar escape char + @param charToEscape the escaped char + @return an unescaped string]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tool, is the standard for any Map-Reduce tool/application. + The tool/application should delegate the handling of + + standard command-line options to {@link ToolRunner#run(Tool, String[])} + and only handle its custom arguments.

    + +

    Here is how a typical Tool is implemented:

    +

    +     public class MyApp extends Configured implements Tool {
    +     
    +       public int run(String[] args) throws Exception {
    +         // Configuration processed by ToolRunner
    +         Configuration conf = getConf();
    +         
    +         // Create a JobConf using the processed conf
    +         JobConf job = new JobConf(conf, MyApp.class);
    +         
    +         // Process custom command-line options
    +         Path in = new Path(args[1]);
    +         Path out = new Path(args[2]);
    +         
    +         // Specify various job-specific parameters     
    +         job.setJobName("my-app");
    +         job.setInputPath(in);
    +         job.setOutputPath(out);
    +         job.setMapperClass(MyApp.MyMapper.class);
    +         job.setReducerClass(MyApp.MyReducer.class);
    +
    +         // Submit the job, then poll for progress until the job is complete
    +         JobClient.runJob(job);
    +       }
    +       
    +       public static void main(String[] args) throws Exception {
    +         // Let ToolRunner handle generic command-line options 
    +         int res = ToolRunner.run(new Configuration(), new Sort(), args);
    +         
    +         System.exit(res);
    +       }
    +     }
    + 

    + + @see GenericOptionsParser + @see ToolRunner]]> +
    +
    + + + + + + + + + + + + Tool by {@link Tool#run(String[])}, after + parsing with the given generic arguments. Uses the given + Configuration, or builds one if null. + + Sets the Tool's configuration with the possibly modified + version of the conf. + + @param conf Configuration for the Tool. + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + Tool with its Configuration. + + Equivalent to run(tool.getConf(), tool, args). + + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + + + ToolRunner can be used to run classes implementing + Tool interface. It works in conjunction with + {@link GenericOptionsParser} to parse the + + generic hadoop command line arguments and modifies the + Configuration of the Tool. The + application-specific options are passed along without being modified. +

    + + @see Tool + @see GenericOptionsParser]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + diff --git a/aarch64/share/hadoop/common/jdiff/hadoop_0.18.1.xml b/aarch64/share/hadoop/common/jdiff/hadoop_0.18.1.xml new file mode 100644 index 0000000..fd844cb --- /dev/null +++ b/aarch64/share/hadoop/common/jdiff/hadoop_0.18.1.xml @@ -0,0 +1,44778 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + final. + + @param name resource to be added, the classpath is examined for a file + with that name.]]> + + + + + + final. + + @param url url of the resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + final. + + @param file file-path of resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + name property, null if + no such property exists. + + Values are processed for variable expansion + before being returned. + + @param name the property name. + @return the value of the name property, + or null if no such property exists.]]> + + + + + + name property, without doing + variable expansion. + + @param name the property name. + @return the value of the name property, + or null if no such property exists.]]> + + + + + + + value of the name property. + + @param name property name. + @param value property value.]]> + + + + + + + name property. If no such property + exists, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value, or defaultValue if the property + doesn't exist.]]> + + + + + + + name property as an int. + + If no such property exists, or if the specified value is not a valid + int, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as an int, + or defaultValue.]]> + + + + + + + name property to an int. + + @param name property name. + @param value int value of the property.]]> + + + + + + + name property as a long. + If no such property is specified, or if the specified value is not a valid + long, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a long, + or defaultValue.]]> + + + + + + + name property to a long. + + @param name property name. + @param value long value of the property.]]> + + + + + + + name property as a float. + If no such property is specified, or if the specified value is not a valid + float, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a float, + or defaultValue.]]> + + + + + + + name property as a boolean. + If no such property is specified, or if the specified value is not a valid + boolean, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a boolean, + or defaultValue.]]> + + + + + + + name property to a boolean. + + @param name property name. + @param value boolean value of the property.]]> + + + + + + + + + + + + + name property as + a collection of Strings. + If no such property is specified then empty collection is returned. +

    + This is an optimized version of {@link #getStrings(String)} + + @param name property name. + @return property value as a collection of Strings.]]> + + + + + + name property as + an array of Strings. + If no such property is specified then null is returned. + + @param name property name. + @return property value as an array of Strings, + or null.]]> + + + + + + + name property as + an array of Strings. + If no such property is specified then default value is returned. + + @param name property name. + @param defaultValue The default value + @return property value as an array of Strings, + or default value.]]> + + + + + + + name property as + as comma delimited values. + + @param name property name. + @param values The values]]> + + + + + + + + + + + + + + name property as a Class. + If no such property is specified, then defaultValue is + returned. + + @param name the class name. + @param defaultValue default value. + @return property value as a Class, + or defaultValue.]]> + + + + + + + + name property as a Class + implementing the interface specified by xface. + + If no such property is specified, then defaultValue is + returned. + + An exception is thrown if the returned class does not implement the named + interface. + + @param name the class name. + @param defaultValue default value. + @param xface the interface implemented by the named class. + @return property value as a Class, + or defaultValue.]]> + + + + + + + + name property to the name of a + theClass implementing the given interface xface. + + An exception is thrown if theClass does not implement the + interface xface. + + @param name property name. + @param theClass property value. + @param xface the interface implemented by the named class.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + + + + + name. + + @param name configuration resource name. + @return an input stream attached to the resource.]]> + + + + + + name. + + @param name configuration resource name. + @return a reader attached to the resource.]]> + + + + + String + key-value pairs in the configuration. + + @return an iterator over the entries.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + true to set quiet-mode on, false + to turn it off.]]> + + + + + + + + + + + Resources + +

    Configurations are specified by resources. A resource contains a set of + name/value pairs as XML data. Each resource is named by either a + String or by a {@link Path}. If named by a String, + then the classpath is examined for a file with that name. If named by a + Path, then the local filesystem is examined directly, without + referring to the classpath. + +

    Hadoop by default specifies two resources, loaded in-order from the + classpath:

      +
    1. hadoop-default.xml + : Read-only defaults for hadoop.
    2. +
    3. hadoop-site.xml: Site-specific configuration for a given hadoop + installation.
    4. +
    + Applications may add additional resources, which are loaded + subsequent to these resources in the order they are added. + +

    Final Parameters

    + +

    Configuration parameters may be declared final. + Once a resource declares a value final, no subsequently-loaded + resource can alter that value. + For example, one might define a final parameter with: +

    +  <property>
    +    <name>dfs.client.buffer.dir</name>
    +    <value>/tmp/hadoop/dfs/client</value>
    +    <final>true</final>
    +  </property>
    + + Administrators typically define parameters as final in + hadoop-site.xml for values that user applications may not alter. + +

    Variable Expansion

    + +

    Value strings are first processed for variable expansion. The + available properties are:

      +
    1. Other properties defined in this Configuration; and, if a name is + undefined here,
    2. +
    3. Properties in {@link System#getProperties()}.
    4. +
    + +

    For example, if a configuration resource contains the following property + definitions: +

    +  <property>
    +    <name>basedir</name>
    +    <value>/user/${user.name}</value>
    +  </property>
    +  
    +  <property>
    +    <name>tempdir</name>
    +    <value>${basedir}/tmp</value>
    +  </property>
    + + When conf.get("tempdir") is called, then ${basedir} + will be resolved to another property in this Configuration, while + ${user.name} would then ordinarily be resolved to the value + of the System property with that name.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The balancer is a tool that balances disk space usage on an HDFS cluster + when some datanodes become full or when new empty nodes join the cluster. + The tool is deployed as an application program that can be run by the + cluster administrator on a live HDFS cluster while applications + adding and deleting files. + +

    SYNOPSIS +

    + To start:
    +      bin/start-balancer.sh [-threshold ]
    +      Example: bin/ start-balancer.sh 
    +                     start the balancer with a default threshold of 10%
    +               bin/ start-balancer.sh -threshold 5
    +                     start the balancer with a threshold of 5%
    + To stop:
    +      bin/ stop-balancer.sh
    + 
    + +

    DESCRIPTION +

    The threshold parameter is a fraction in the range of (0%, 100%) with a + default value of 10%. The threshold sets a target for whether the cluster + is balanced. A cluster is balanced if for each datanode, the utilization + of the node (ratio of used space at the node to total capacity of the node) + differs from the utilization of the (ratio of used space in the cluster + to total capacity of the cluster) by no more than the threshold value. + The smaller the threshold, the more balanced a cluster will become. + It takes more time to run the balancer for small threshold values. + Also for a very small threshold the cluster may not be able to reach the + balanced state when applications write and delete files concurrently. + +

    The tool moves blocks from highly utilized datanodes to poorly + utilized datanodes iteratively. In each iteration a datanode moves or + receives no more than the lesser of 10G bytes or the threshold fraction + of its capacity. Each iteration runs no more than 20 minutes. + At the end of each iteration, the balancer obtains updated datanodes + information from the namenode. + +

    A system property that limits the balancer's use of bandwidth is + defined in the default configuration file: +

    + 
    +   dfs.balance.bandwidthPerSec
    +   1048576
    +   Specifies the maximum bandwidth that each datanode 
    + can utilize for the balancing purpose in term of the number of bytes 
    + per second. 
    + 
    + 
    + +

    This property determines the maximum speed at which a block will be + moved from one datanode to another. The default value is 1MB/s. The higher + the bandwidth, the faster a cluster can reach the balanced state, + but with greater competition with application processes. If an + administrator changes the value of this property in the configuration + file, the change is observed when HDFS is next restarted. + +

    MONITERING BALANCER PROGRESS +

    After the balancer is started, an output file name where the balancer + progress will be recorded is printed on the screen. The administrator + can monitor the running of the balancer by reading the output file. + The output shows the balancer's status iteration by iteration. In each + iteration it prints the starting time, the iteration number, the total + number of bytes that have been moved in the previous iterations, + the total number of bytes that are left to move in order for the cluster + to be balanced, and the number of bytes that are being moved in this + iteration. Normally "Bytes Already Moved" is increasing while "Bytes Left + To Move" is decreasing. + +

    Running multiple instances of the balancer in an HDFS cluster is + prohibited by the tool. + +

    The balancer automatically exits when any of the following five + conditions is satisfied: +

      +
    1. The cluster is balanced; +
    2. No block can be moved; +
    3. No block has been moved for five consecutive iterations; +
    4. An IOException occurs while communicating with the namenode; +
    5. Another balancer is running. +
    + +

    Upon exit, a balancer returns an exit code and prints one of the + following messages to the output file in corresponding to the above exit + reasons: +

      +
    1. The cluster is balanced. Exiting +
    2. No block can be moved. Exiting... +
    3. No block has been moved for 3 iterations. Exiting... +
    4. Received an IO exception: failure reason. Exiting... +
    5. Another balancer is running. Exiting... +
    + +

    The administrator can interrupt the execution of the balancer at any + time by running the command "stop-balancer.sh" on the machine where the + balancer is running.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + in]]> + + + + + + + out.]]> + + + + + + + + + + reset is true, then resets the checksum. + @return number of bytes written. Will be equal to getChecksumSize();]]> + + + + + + + + + reset is true, then resets the checksum. + @return number of bytes written. Will be equal to getChecksumSize();]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + stream of bytes (of BLOCK_SIZE or less) + + This info is stored on a local disk. The DataNode + reports the table's contents to the NameNode upon startup + and every so often afterwards. + + DataNodes spend their lives in an endless loop of asking + the NameNode for something to do. A NameNode cannot connect + to a DataNode directly; a NameNode simply returns values from + functions invoked by a DataNode. + + DataNodes maintain an open server socket so that client code + or other DataNodes can read/write data. The host/port for + this server is reported to the NameNode, which then sends that + information to clients or other DataNodes that might be interested.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The tool scans all files and directories, starting from an indicated + root path. The following abnormal conditions are detected and handled:

    +
      +
    • files with blocks that are completely missing from all datanodes.
      + In this case the tool can perform one of the following actions: +
        +
      • none ({@link NamenodeFsck#FIXING_NONE})
      • +
      • move corrupted files to /lost+found directory on DFS + ({@link NamenodeFsck#FIXING_MOVE}). Remaining data blocks are saved as a + block chains, representing longest consecutive series of valid blocks.
      • +
      • delete corrupted files ({@link NamenodeFsck#FIXING_DELETE})
      • +
      +
    • +
    • detect files with under-replicated or over-replicated blocks
    • +
    + Additionally, the tool collects a detailed overall DFS statistics, and + optionally can print detailed statistics on block locations and replication + factors of each file. + The tool also provides and option to filter open files during the scan.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + :/data[/] HTTP/1.1 + }]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class has a number of metrics variables that are publicly accessible; + these variables (objects) have methods to update their values; + for example: +

    {@link #filesTotal}.set()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + :/listPaths[/][[&option]*] HTTP/1.1 + } + + Where option (default) in: + recursive ("no") + filter (".*") + exclude ("\..*\.crc") + + Response: A flat list of files/directories in the following format: + {@code +

    + + + + }]]> +
    + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The name-node can be started with one of the following startup options: +
      +
    • {@link FSConstants.StartupOption#REGULAR REGULAR} - normal startup
    • +
    • {@link FSConstants.StartupOption#FORMAT FORMAT} - format name node
    • +
    • {@link FSConstants.StartupOption#UPGRADE UPGRADE} - start the cluster + upgrade and create a snapshot of the current file system state
    • +
    • {@link FSConstants.StartupOption#ROLLBACK ROLLBACK} - roll the + cluster back to the previous state
    • +
    + The option is passed via configuration field: + dfs.namenode.startup + + The conf will be modified to reflect the actual ports on which + the NameNode is up and running if the user passes the port as + zero in the conf. + + @param conf confirguration + @throws IOException]]> +
    +
    + + + + zero.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + datanode whose + total size is size + + @param datanode on which blocks are located + @param size total size of blocks]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + blocksequence (namespace) + 2) block->machinelist ("inodes") + + The first table is stored on disk and is very precious. + The second table is rebuilt every time the NameNode comes + up. + + 'NameNode' refers to both this class as well as the 'NameNode server'. + The 'FSNamesystem' class actually performs most of the filesystem + management. The majority of the 'NameNode' class itself is concerned + with exposing the IPC interface to the outside world, plus some + configuration management. + + NameNode implements the ClientProtocol interface, which allows + clients to ask for DFS services. ClientProtocol is not + designed for direct use by authors of DFS client code. End-users + should instead use the org.apache.nutch.hadoop.fs.FileSystem class. + + NameNode also implements the DatanodeProtocol interface, used by + DataNode programs that actually store DFS data blocks. These + methods are invoked repeatedly and automatically by all the + DataNodes in a DFS deployment. + + NameNode also implements the NamenodeProtocol interface, used by + secondary namenodes or rebalancing processes to get partial namenode's + state, for example partial blocksMap etc.]]> + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The tool scans all files and directories, starting from an indicated + root path. The following abnormal conditions are detected and handled:

    +
      +
    • files with blocks that are completely missing from all datanodes.
      + In this case the tool can perform one of the following actions: +
        +
      • none ({@link #FIXING_NONE})
      • +
      • move corrupted files to /lost+found directory on DFS + ({@link #FIXING_MOVE}). Remaining data blocks are saved as a + block chains, representing longest consecutive series of valid blocks.
      • +
      • delete corrupted files ({@link #FIXING_DELETE})
      • +
      +
    • +
    • detect files with under-replicated or over-replicated blocks
    • +
    + Additionally, the tool collects a detailed overall DFS statistics, and + optionally can print detailed statistics on block locations and replication + factors of each file.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class has a number of metrics variables that are publicly accessible; + these variables (objects) have methods to update their values; + for example: +

    {@link #syncs}.inc()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class has a number of metrics variables that are publicly accessible; + these variables (objects) have methods to update their values; + for example: +

    {@link #blocksRead}.inc()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For the statistics that are sampled and averaged, one must specify + a metrics context that does periodic update calls. Most do. + The default Null metrics context however does NOT. So if you aren't + using any other metrics context then you can turn on the viewing and averaging + of sampled metrics by specifying the following two lines + in the hadoop-meterics.properties file: +

    +        dfs.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
    +        dfs.period=10
    +  
    +

    + Note that the metrics are collected regardless of the context used. + The context with the update thread is used to average the data periodically. +

    + Name Node Status info is reported in another MBean + @see org.apache.hadoop.dfs.datanode.metrics.FSDatasetMBean]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Data Node runtime statistic info is report in another MBean + @see org.apache.hadoop.dfs.datanode.metrics.DataNodeStatisticsMBean]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Name Node runtime statistic info is report in another MBean + @see org.apache.hadoop.dfs.namenode.metrics.NameNodeStatisticsMBean]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For the statistics that are sampled and averaged, one must specify + a metrics context that does periodic update calls. Most do. + The default Null metrics context however does NOT. So if you aren't + using any other metrics context then you can turn on the viewing and averaging + of sampled metrics by specifying the following two lines + in the hadoop-meterics.properties file: +

    +        dfs.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
    +        dfs.period=10
    +  
    +

    + Note that the metrics are collected regardless of the context used. + The context with the update thread is used to average the data periodically. +

    + Name Node Status info is report in another MBean + @see org.apache.hadoop.dfs.namenode.metrics.FSNamesystemMBean]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DistributedCache is a facility provided by the Map-Reduce + framework to cache files (text, archives, jars etc.) needed by applications. +

    + +

    Applications specify the files, via urls (hdfs:// or http://) to be cached + via the {@link JobConf}. The DistributedCache assumes that the + files specified via hdfs:// urls are already present on the + {@link FileSystem} at the path specified by the url.

    + +

    The framework will copy the necessary files on to the slave node before + any tasks for the job are executed on that node. Its efficiency stems from + the fact that the files are only copied once per job and the ability to + cache archives which are un-archived on the slaves.

    + +

    DistributedCache can be used to distribute simple, read-only + data/text files and/or more complex types such as archives, jars etc. + Archives (zip, tar and tgz/tar.gz files) are un-archived at the slave nodes. + Jars may be optionally added to the classpath of the tasks, a rudimentary + software distribution mechanism. Files have execution permissions. + Optionally users can also direct it to symlink the distributed cache file(s) + into the working directory of the task.

    + +

    DistributedCache tracks modification timestamps of the cache + files. Clearly the cache files should not be modified by the application + or externally while the job is executing.

    + +

    Here is an illustrative example on how to use the + DistributedCache:

    +

    +     // Setting up the cache for the application
    +     
    +     1. Copy the requisite files to the FileSystem:
    +     
    +     $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat  
    +     $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip  
    +     $ bin/hadoop fs -copyFromLocal mylib.jar /myapp/mylib.jar
    +     $ bin/hadoop fs -copyFromLocal mytar.tar /myapp/mytar.tar
    +     $ bin/hadoop fs -copyFromLocal mytgz.tgz /myapp/mytgz.tgz
    +     $ bin/hadoop fs -copyFromLocal mytargz.tar.gz /myapp/mytargz.tar.gz
    +     
    +     2. Setup the application's JobConf:
    +     
    +     JobConf job = new JobConf();
    +     DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"), 
    +                                   job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/map.zip", job);
    +     DistributedCache.addFileToClassPath(new Path("/myapp/mylib.jar"), job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytar.tar", job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytgz.tgz", job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytargz.tar.gz", job);
    +     
    +     3. Use the cached files in the {@link Mapper} or {@link Reducer}:
    +     
    +     public static class MapClass extends MapReduceBase  
    +     implements Mapper<K, V, K, V> {
    +     
    +       private Path[] localArchives;
    +       private Path[] localFiles;
    +       
    +       public void configure(JobConf job) {
    +         // Get the cached archives/files
    +         localArchives = DistributedCache.getLocalCacheArchives(job);
    +         localFiles = DistributedCache.getLocalCacheFiles(job);
    +       }
    +       
    +       public void map(K key, V value, 
    +                       OutputCollector<K, V> output, Reporter reporter) 
    +       throws IOException {
    +         // Use data from the cached archives/files here
    +         // ...
    +         // ...
    +         output.collect(k, v);
    +       }
    +     }
    +     
    + 

    + + @see JobConf + @see JobClient]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BufferedFSInputStream + with the specified buffer size, + and saves its argument, the input stream + in, for later use. An internal + buffer array of length size + is created and stored in buf. + + @param in the underlying input stream. + @param size the buffer size. + @exception IllegalArgumentException if size <= 0.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + setReplication of FileSystem + @param src file name + @param replication new replication + @throws IOException + @return true if successful; + false if file does not exist or is a directory]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ']]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + fs.scheme.class whose value names the FileSystem class. + The entire URI is passed to the FileSystem instance's initialize method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Return all the files that match filePattern and are not checksum + files. Results are sorted by their names. + +

    + A filename pattern is composed of regular characters and + special pattern matching characters, which are: + +

    +
    +
    +

    +

    ? +
    Matches any single character. + +

    +

    * +
    Matches zero or more characters. + +

    +

    [abc] +
    Matches a single character from character set + {a,b,c}. + +

    +

    [a-b] +
    Matches a single character from the character range + {a...b}. Note that character a must be + lexicographically less than or equal to character b. + +

    +

    [^a] +
    Matches a single character that is not from character set or range + {a}. Note that the ^ character must occur + immediately to the right of the opening bracket. + +

    +

    \c +
    Removes (escapes) any special meaning of character c. + +

    +

    {ab,cd} +
    Matches a string from the string set {ab, cd} + +

    +

    {ab,c{de,fh}} +
    Matches a string from the string set {ab, cde, cfh} + +
    +
    +
    + + @param pathPattern a regular expression specifying a pth pattern + + @return an array of paths that match the path pattern + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + All user code that may potentially use the Hadoop Distributed + File System should be written to use a FileSystem object. The + Hadoop DFS is a multi-machine system that appears as a single + disk. It's useful because of its fault tolerance and potentially + very large capacity. + +

    + The local implementation is {@link LocalFileSystem} and distributed + implementation is {@link DistributedFileSystem}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FilterFileSystem contains + some other file system, which it uses as + its basic file system, possibly transforming + the data along the way or providing additional + functionality. The class FilterFileSystem + itself simply overrides all methods of + FileSystem with versions that + pass all requests to the contained file + system. Subclasses of FilterFileSystem + may further override some of these methods + and may also provide additional methods + and fields.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + buf at offset + and checksum into checksum. + The method is used for implementing read, therefore, it should be optimized + for sequential reading + @param pos chunkPos + @param buf desitination buffer + @param offset offset in buf at which to store data + @param len maximun number of bytes to read + @return number of bytes read]]> + + + + + + + + + + + + + + + + + -1 if the end of the + stream is reached. + @exception IOException if an I/O error occurs.]]> + + + + + + + + + This method implements the general contract of the corresponding + {@link InputStream#read(byte[], int, int) read} method of + the {@link InputStream} class. As an additional + convenience, it attempts to read as many bytes as possible by repeatedly + invoking the read method of the underlying stream. This + iterated read continues until one of the following + conditions becomes true:

      + +
    • The specified number of bytes have been read, + +
    • The read method of the underlying stream returns + -1, indicating end-of-file. + +
    If the first read on the underlying stream returns + -1 to indicate end-of-file then this method returns + -1. Otherwise this method returns the number of bytes + actually read. + + @param b destination buffer. + @param off offset at which to start storing bytes. + @param len maximum number of bytes to read. + @return the number of bytes read, or -1 if the end of + the stream has been reached. + @exception IOException if an I/O error occurs. + ChecksumException if any checksum error occurs]]> +
    + + + + + + + + + + + + n bytes of data from the + input stream. + +

    This method may skip more bytes than are remaining in the backing + file. This produces no exception and the number of bytes skipped + may include some number of bytes that were beyond the EOF of the + backing file. Attempting to read from the stream after skipping past + the end will result in -1 indicating the end of the file. + +

    If n is negative, no bytes are skipped. + + @param n the number of bytes to be skipped. + @return the actual number of bytes skipped. + @exception IOException if an I/O error occurs. + ChecksumException if the chunk to skip to is corrupted]]> + + + + + + + This method may seek past the end of the file. + This produces no exception and an attempt to read from + the stream will result in -1 indicating the end of the file. + + @param pos the postion to seek to. + @exception IOException if an I/O error occurs. + ChecksumException if the chunk to seek to is corrupted]]> + + + + + + + + + + len bytes from + stm + + @param stm an input stream + @param buf destiniation buffer + @param offset offset at which to store data + @param len number of bytes to read + @return actual number of bytes read + @throws IOException if there is any IO error]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + len bytes from the specified byte array + starting at offset off and generate a checksum for + each data chunk. + +

    This method stores bytes from the given array into this + stream's buffer before it gets checksumed. The buffer gets checksumed + and flushed to the underlying output stream when all data + in a checksum chunk are in the buffer. If the buffer is empty and + requested length is at least as large as the size of next checksum chunk + size, this method will checksum and write the chunk directly + to the underlying output stream. Thus it avoids uneccessary data copy. + + @param b the data. + @param off the start offset in the data. + @param len the number of bytes to write. + @exception IOException if an I/O error occurs.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if and only if pathname + should be included]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + trash feature. Files are moved to a user's trash + directory, a subdirectory of their home directory named ".Trash". Files are + initially moved to a current sub-directory of the trash directory. + Within that sub-directory their original path is preserved. Periodically + one may checkpoint the current trash and remove older checkpoints. (This + design permits trash management without enumeration of the full trash + content, without date support in the filesystem, and without clock + synchronization.)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} backed by an FTP client provided by Apache Commons Net. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is a tool for migrating data from an older to a newer version + of an S3 filesystem. +

    +

    + All files in the filesystem are migrated by re-writing the block metadata + - no datafiles are touched. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + Extracts AWS credentials from the filesystem URI or configuration. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A block-based {@link FileSystem} backed by + Amazon S3. +

    + @see NativeS3FileSystem]]> +
    +
    + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If f is a file, this method will make a single call to S3. + If f is a directory, this method will make a maximum of + (n / 1000) + 2 calls to S3, where n is the total number of + files and directories contained directly in f. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} for reading and writing files stored on + Amazon S3. + Unlike {@link org.apache.hadoop.fs.s3.S3FileSystem} this implementation + stores files on S3 in their + native form so they can be read by other S3 tools. +

    + @see org.apache.hadoop.fs.s3.S3FileSystem]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + nth value.]]> + + + + + + + + + + + + + + + + + + + + + nth value in the file.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + public class IntArrayWritable extends ArrayWritable { + public IntArrayWritable() { + super(IntWritable.class); + } + } + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a ByteWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new DataInputStream and + ByteArrayInputStream each time data is read. + +

    Typical usage is something like the following:

    +
    + DataInputBuffer buffer = new DataInputBuffer();
    + while (... loop condition ...) {
    +   byte[] data = ... get data ...;
    +   int dataLength = ... get data length ...;
    +   buffer.reset(data, dataLength);
    +   ... read buffer using DataInput methods ...
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new DataOutputStream and + ByteArrayOutputStream each time data is written. + +

    Typical usage is something like the following:

    +
    + DataOutputBuffer buffer = new DataOutputBuffer();
    + while (... loop condition ...) {
    +   buffer.reset();
    +   ... write buffer using DataOutput methods ...
    +   byte[] data = buffer.getData();
    +   int dataLength = buffer.getLength();
    +   ... write data to its ultimate destination ...
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + the class of the item + @param conf the configuration to store + @param item the object to be stored + @param keyName the name of the key to use + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param items the objects to be stored + @param keyName the name of the key to use + @throws IndexOutOfBoundsException if the items array is empty + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + DefaultStringifier offers convenience methods to store/load objects to/from + the configuration. + + @param the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a DoubleWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a FloatWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When two sequence files, which have same Key type but different Value + types, are mapped out to reduce, multiple Value types is not allowed. + In this case, this class can help you wrap instances with different types. +

    + +

    + Compared with ObjectWritable, this class is much more effective, + because ObjectWritable will append the class declaration as a String + into the output file in every Key-Value pair. +

    + +

    + Generic Writable implements {@link Configurable} interface, so that it will be + configured by the framework. The configuration is passed to the wrapped objects + implementing {@link Configurable} interface before deserialization. +

    + + how to use it:
    + 1. Write your own class, such as GenericObject, which extends GenericWritable.
    + 2. Implements the abstract method getTypes(), defines + the classes which will be wrapped in GenericObject in application. + Attention: this classes defined in getTypes() method, must + implement Writable interface. +

    + + The code looks like this: +
    + public class GenericObject extends GenericWritable {
    + 
    +   private static Class[] CLASSES = {
    +               ClassType1.class, 
    +               ClassType2.class,
    +               ClassType3.class,
    +               };
    +
    +   protected Class[] getTypes() {
    +       return CLASSES;
    +   }
    +
    + }
    + 
    + + @since Nov 8, 2006]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new InputStream and + ByteArrayInputStream each time data is read. + +

    Typical usage is something like the following:

    +
    + InputBuffer buffer = new InputBuffer();
    + while (... loop condition ...) {
    +   byte[] data = ... get data ...;
    +   int dataLength = ... get data length ...;
    +   buffer.reset(data, dataLength);
    +   ... read buffer using InputStream methods ...
    + }
    + 
    + @see DataInputBuffer + @see DataOutput]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a IntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + closes the input and output streams + at the end. + @param in InputStrem to read from + @param out OutputStream to write to + @param conf the Configuration object]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ignore any {@link IOException} or + null pointers. Must only be used for cleanup in exception handlers. + @param log the log to record problems to at debug level. Can be null. + @param closeables the objects to close]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a LongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A map is a directory containing two files, the data file, + containing all keys and values in the map, and a smaller index + file, containing a fraction of the keys. The fraction is determined by + {@link Writer#getIndexInterval()}. + +

    The index file is read entirely into memory. Thus key implementations + should try to keep themselves small. + +

    Map files are created by adding entries in-order. To maintain a large + database, perform updates by copying the previous version of a database and + merging in a sorted change list, to create a new version of the database in + a new file. Sorting large change lists can be done with {@link + SequenceFile.Sorter}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key and + val. Returns true if such a pair exists and false when at + the end of the map]]> + + + + + + + + + + + + + + + + key or if it does not exist, at the first entry + after the named key. + +- * @param key - key that we're trying to find +- * @param val - data value if key is found +- * @return - the key that was the closest match or null if eof.]]> + + + + + + + + + key does not exist, return + the first entry that falls just before the key. Otherwise, + return the record that sorts just after. + @return - the key that was the closest match or null if eof.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is an MD5Hash whose digest contains the + same values.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new OutputStream and + ByteArrayOutputStream each time data is written. + +

    Typical usage is something like the following:

    +
    + OutputBuffer buffer = new OutputBuffer();
    + while (... loop condition ...) {
    +   buffer.reset();
    +   ... write buffer using OutputStream methods ...
    +   byte[] data = buffer.getData();
    +   int dataLength = buffer.getLength();
    +   ... write data to its ultimate destination ...
    + }
    + 
    + @see DataOutputBuffer + @see InputBuffer]]> +
    +
    + + + + + + + + + + + + + + + A {@link Comparator} that operates directly on byte representations of + objects. +

    + @param + @see DeserializerComparator]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SequenceFiles are flat files consisting of binary key/value + pairs. + +

    SequenceFile provides {@link Writer}, {@link Reader} and + {@link Sorter} classes for writing, reading and sorting respectively.

    + + There are three SequenceFile Writers based on the + {@link CompressionType} used to compress key/value pairs: +
      +
    1. + Writer : Uncompressed records. +
    2. +
    3. + RecordCompressWriter : Record-compressed files, only compress + values. +
    4. +
    5. + BlockCompressWriter : Block-compressed files, both keys & + values are collected in 'blocks' + separately and compressed. The size of + the 'block' is configurable. +
    + +

    The actual compression algorithm used to compress key and/or values can be + specified by using the appropriate {@link CompressionCodec}.

    + +

    The recommended way is to use the static createWriter methods + provided by the SequenceFile to chose the preferred format.

    + +

    The {@link Reader} acts as the bridge and can read any of the above + SequenceFile formats.

    + +

    SequenceFile Formats

    + +

    Essentially there are 3 different formats for SequenceFiles + depending on the CompressionType specified. All of them share a + common header described below. + +

    +
      +
    • + version - 3 bytes of magic header SEQ, followed by 1 byte of actual + version number (e.g. SEQ4 or SEQ6) +
    • +
    • + keyClassName -key class +
    • +
    • + valueClassName - value class +
    • +
    • + compression - A boolean which specifies if compression is turned on for + keys/values in this file. +
    • +
    • + blockCompression - A boolean which specifies if block-compression is + turned on for keys/values in this file. +
    • +
    • + compression codec - CompressionCodec class which is used for + compression of keys and/or values (if compression is + enabled). +
    • +
    • + metadata - {@link Metadata} for this file. +
    • +
    • + sync - A sync marker to denote end of the header. +
    • +
    + +
    Uncompressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Value
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +
    Record-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Compressed Value
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +
    Block-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record Block +
        +
      • Compressed key-lengths block-size
      • +
      • Compressed key-lengths block
      • +
      • Compressed keys block-size
      • +
      • Compressed keys block
      • +
      • Compressed value-lengths block-size
      • +
      • Compressed value-lengths block
      • +
      • Compressed values block-size
      • +
      • Compressed values block
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +

    The compressed blocks of key lengths and value lengths consist of the + actual lengths of individual keys/values encoded in ZeroCompressedInteger + format.

    + + @see CompressionCodec]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key, skipping its + value. True if another entry exists, and false at end of file.]]> + + + + + + + + key and + val. Returns true if such a pair exists and false when at + end of file]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The position passed must be a position returned by {@link + SequenceFile.Writer#getLength()} when writing this file. To seek to an arbitrary + position, use {@link SequenceFile.Reader#sync(long)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SegmentDescriptor + @param segments the list of SegmentDescriptors + @param tmpDir the directory to write temporary files into + @return RawKeyValueIterator + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For best performance, applications should make sure that the {@link + Writable#readFields(DataInput)} implementation of their keys is + very efficient. In particular, it should avoid allocating memory.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This always returns a synchronized position. In other words, + immediately after calling {@link SequenceFile.Reader#seek(long)} with a position + returned by this method, {@link SequenceFile.Reader#next(Writable)} may be called. However + the key may be earlier in the file than key last written when this + method was called (e.g., with block-compression, it may be the first key + in the block that was being written when this method was called).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key. Returns + true if such a key exists and false when at the end of the set.]]> + + + + + + + key. + Returns key, or null if no match exists.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + position. Note that this + method avoids using the converter or doing String instatiation + @return the Unicode scalar value at position or -1 + if the position is invalid or points to a + trailing byte]]> + + + + + + + + + + what in the backing + buffer, starting as position start. The starting + position is measured in bytes and the return value is in + terms of byte position in the buffer. The backing buffer is + not converted to a string for this operation. + @return byte position of the first occurence of the search + string in the UTF-8 buffer or -1 if not found]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a Text with the same contents.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException.]]> + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException. + @return ByteBuffer: bytes stores at ByteBuffer.array() + and length is ByteBuffer.limit()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + In + addition, it provides methods for string traversal without converting the + byte array to a string.

    Also includes utilities for + serializing/deserialing a string, coding/decoding a string, checking if a + byte array contains valid UTF8 code, calculating the length of an encoded + string.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a UTF8 with the same contents.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + Also includes utilities for efficiently reading and writing UTF-8. + + @deprecated replaced by Text]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is useful when a class may evolve, so that instances written by the + old version of the class may still be processed by the new version. To + handle this situation, {@link #readFields(DataInput)} + implementations should catch {@link VersionMismatchException}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VIntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VLongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + out. + + @param out DataOuput to serialize this object into. + @throws IOException]]> + + + + + + + in. + +

    For efficiency, implementations should attempt to re-use storage in the + existing object where possible.

    + + @param in DataInput to deseriablize this object from. + @throws IOException]]> +
    + + + Any key or value type in the Hadoop Map-Reduce + framework implements this interface.

    + +

    Implementations typically implement a static read(DataInput) + method which constructs a new instance, calls {@link #readFields(DataInput)} + and returns the instance.

    + +

    Example:

    +

    +     public class MyWritable implements Writable {
    +       // Some data     
    +       private int counter;
    +       private long timestamp;
    +       
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +       
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +       
    +       public static MyWritable read(DataInput in) throws IOException {
    +         MyWritable w = new MyWritable();
    +         w.readFields(in);
    +         return w;
    +       }
    +     }
    + 

    ]]> +
    +
    + + + + + + + WritableComparables can be compared to each other, typically + via Comparators. Any type which is to be used as a + key in the Hadoop Map-Reduce framework should implement this + interface.

    + +

    Example:

    +

    +     public class MyWritableComparable implements WritableComparable {
    +       // Some data
    +       private int counter;
    +       private long timestamp;
    +       
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +       
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +       
    +       public int compareTo(MyWritableComparable w) {
    +         int thisValue = this.value;
    +         int thatValue = ((IntWritable)o).value;
    +         return (thisValue < thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
    +       }
    +     }
    + 

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The default implementation reads the data into two {@link + WritableComparable}s (using {@link + Writable#readFields(DataInput)}, then calls {@link + #compare(WritableComparable,WritableComparable)}.]]> + + + + + + + The default implementation uses the natural ordering, calling {@link + Comparable#compareTo(Object)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This base implemenation uses the natural ordering. To define alternate + orderings, override {@link #compare(WritableComparable,WritableComparable)}. + +

    One may optimize compare-intensive operations by overriding + {@link #compare(byte[],int,int,byte[],int,int)}. Static utility methods are + provided to assist in optimized implementations of this method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Enum type + @param in DataInput to read from + @param enumType Class type of Enum + @return Enum represented by String read from DataInput + @throws IOException]]> + + + + + + + + + + + + + + + + len number of bytes in input streamin + @param in input stream + @param len number of bytes to skip + @throws IOException when skipped less number of bytes]]> + + + + + + + + + + + + + + CompressionCodec for which to get the + Compressor + @return Compressor for the given + CompressionCodec from the pool or a new one]]> + + + + + + CompressionCodec for which to get the + Decompressor + @return Decompressor for the given + CompressionCodec the pool or a new one]]> + + + + + + Compressor to be returned to the pool]]> + + + + + + Decompressor to be returned to the + pool]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Implementations are assumed to be buffered. This permits clients to + reposition the underlying input stream then call {@link #resetState()}, + without having to also synchronize client buffers.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + true if a preset dictionary is needed for decompression. + @return true if a preset dictionary is needed for decompression]]> + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if native-lzo library is loaded & initialized; + else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + lzo compression/decompression pair. + http://www.oberhumer.com/opensource/lzo/]]> + + + + + + + + + + + + + + + + + + + + + true if lzo compressors are loaded & initialized, + else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if lzo decompressors are loaded & initialized, + else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return the total (non-negative) number of uncompressed bytes input so far]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return the total (non-negative) number of uncompressed bytes input so far]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if native-zlib is loaded & initialized + and can be loaded for this job, else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Keep trying a limited number of times, waiting a fixed time between attempts, + and then fail by re-throwing the exception. +

    ]]> +
    + + + + + + + + Keep trying for a maximum time, waiting a fixed time between attempts, + and then fail by re-throwing the exception. +

    ]]> +
    +
    + + + + + + + Keep trying a limited number of times, waiting a growing amount of time between attempts, + and then fail by re-throwing the exception. + The time between attempts is sleepTime mutliplied by the number of tries so far. +

    ]]> +
    +
    + + + + + + + Keep trying a limited number of times, waiting a growing amount of time between attempts, + and then fail by re-throwing the exception. + The time between attempts is sleepTime mutliplied by a random + number in the range of [0, 2 to the number of retries) +

    ]]> +
    +
    + + + + + + Set a default policy with some explicit handlers for specific exceptions. +

    ]]> +
    +
    + + + + + + A retry policy for RemoteException + Set a default policy with some explicit handlers for specific exceptions. +

    ]]> +
    +
    + + + + Try once, and fail by re-throwing the exception. + This corresponds to having no retry mechanism in place. +

    ]]> +
    +
    + + + + Try once, and fail silently for void methods, or by + re-throwing the exception for non-void methods. +

    ]]> +
    +
    + + + + Keep trying forever. +

    ]]> +
    +
    + + + A collection of useful implementations of {@link RetryPolicy}. +

    ]]> +
    +
    + + + + + + + + + + Determines whether the framework should retry a + method for the given exception, and the number + of retries that have been made for that operation + so far. +

    + @param e The exception that caused the method to fail. + @param retries The number of times the method has been retried. + @return true if the method should be retried, + false if the method should not be retried + but shouldn't fail with an exception (only for void methods). + @throws Exception The re-thrown exception e indicating + that the method failed and should not be retried further.]]> +
    +
    + + + Specifies a policy for retrying method failures. + Implementations of this interface should be immutable. +

    ]]> +
    +
    + + + + + + + + + + + + Create a proxy for an interface of an implementation class + using the same retry policy for each method in the interface. +

    + @param iface the interface that the retry will implement + @param implementation the instance whose methods should be retried + @param retryPolicy the policy for retirying method call failures + @return the retry proxy]]> +
    +
    + + + + + + + Create a proxy for an interface of an implementation class + using the a set of retry policies specified by method name. + If no retry policy is defined for a method then a default of + {@link RetryPolicies#TRY_ONCE_THEN_FAIL} is used. +

    + @param iface the interface that the retry will implement + @param implementation the instance whose methods should be retried + @param methodNameToPolicyMap a map of method names to retry policies + @return the retry proxy]]> +
    +
    + + + A factory for creating retry proxies. +

    ]]> +
    +
    + + + + + + + + + + Prepare the deserializer for reading.

    ]]> +
    +
    + + + + + + Deserialize the next object from the underlying input stream. + If the object t is non-null then this deserializer + may set its internal state to the next object read from the input + stream. Otherwise, if the object t is null a new + deserialized object will be created. +

    + @return the deserialized object]]> +
    +
    + + + + Close the underlying input stream and clear up any resources.

    ]]> +
    +
    + + + Provides a facility for deserializing objects of type from an + {@link InputStream}. +

    + +

    + Deserializers are stateful, but must not buffer the input since + other producers may read from the input between calls to + {@link #deserialize(Object)}. +

    + @param ]]> +
    +
    + + + + + + + + + + + + + + + + + + A {@link RawComparator} that uses a {@link Deserializer} to deserialize + the objects to be compared so that the standard {@link Comparator} can + be used to compare them. +

    +

    + One may optimize compare-intensive operations by using a custom + implementation of {@link RawComparator} that operates directly + on byte representations. +

    + @param ]]> +
    +
    + + + + + + + + + + + + + + + + + + An experimental {@link Serialization} for Java {@link Serializable} classes. +

    + @see JavaSerializationComparator]]> +
    +
    + + + + + + + + + + + + + A {@link RawComparator} that uses a {@link JavaSerialization} + {@link Deserializer} to deserialize objects that are then compared via + their {@link Comparable} interfaces. +

    + @param + @see JavaSerialization]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + Encapsulates a {@link Serializer}/{@link Deserializer} pair. +

    + @param ]]> +
    +
    + + + + + + + Serializations are found by reading the io.serializations + property from conf, which is a comma-delimited list of + classnames. +

    ]]> +
    +
    + + + + + + + + + + + + A factory for {@link Serialization}s. +

    ]]> +
    +
    + + + + + + + + Prepare the serializer for writing.

    ]]> +
    +
    + + + + + Serialize t to the underlying output stream.

    ]]> +
    +
    + + + + Close the underlying output stream and clear up any resources.

    ]]> +
    +
    + + + Provides a facility for serializing objects of type to an + {@link OutputStream}. +

    + +

    + Serializers are stateful, but must not buffer the output since + other producers may write to the output between calls to + {@link #serialize(Object)}. +

    + @param ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + param, to the IPC server running at + address, returning the value. Throws exceptions if there are + network problems or if the remote code threw an exception.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Unwraps any IOException. + + @param lookupTypes the desired exception class. + @return IOException, which is either the lookupClass exception or this.]]> + + + + + This unwraps any Throwable that has a constructor taking + a String as a parameter. + Otherwise it returns this. + + @return Throwable]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + protocol is a Java interface. All parameters and return types must + be one of: + +
    • a primitive type, boolean, byte, + char, short, int, long, + float, double, or void; or
    • + +
    • a {@link String}; or
    • + +
    • a {@link Writable}; or
    • + +
    • an array of the above types
    + + All methods in the protocol should throw only IOException. No field data of + the protocol instance is transmitted.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + handlerCount determines + the number of handler threads that will be used to process calls.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + This class has a number of metrics variables that are publicly accessible; + these variables (objects) have methods to update their values; + for example: +

    {@link #rpcQueueTime}.inc(time)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For the statistics that are sampled and averaged, one must specify + a metrics context that does periodic update calls. Most do. + The default Null metrics context however does NOT. So if you aren't + using any other metrics context then you can turn on the viewing and averaging + of sampled metrics by specifying the following two lines + in the hadoop-meterics.properties file: +

    +        rpc.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
    +        rpc.period=10
    +  
    +

    + Note that the metrics are collected regardless of the context used. + The context with the update thread is used to average the data periodically]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobTracker, + as {@link JobTracker.State} + + @return the current state of the JobTracker.]]> + + + + + + + + + + + + ClusterStatus provides clients with information such as: +

      +
    1. + Size of the cluster. +
    2. +
    3. + Task capacity of the cluster. +
    4. +
    5. + The number of currently running map & reduce tasks. +
    6. +
    7. + State of the JobTracker. +
    8. +

    + +

    Clients can query for the latest ClusterStatus, via + {@link JobClient#getClusterStatus()}.

    + + @see JobClient]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Counters represent global counters, defined either by the + Map-Reduce framework or applications. Each Counter can be of + any {@link Enum} type.

    + +

    Counters are bunched into {@link Group}s, each comprising of + counters from a particular Enum class.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Group of counters, comprising of counters from a particular + counter {@link Enum} class. + +

    Grouphandles localization of the class name and the + counter names.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileInputFormat implementations can override this and return + false to ensure that individual input files are never split-up + so that {@link Mapper}s process entire files. + + @param fs the file system that the file is on + @param filename the file name to check + @return is this file splitable?]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileInputFormat is the base class for all file-based + InputFormats. This provides a generic implementation of + {@link #getSplits(JobConf, int)}. + Subclasses of FileInputFormat can also override the + {@link #isSplitable(FileSystem, Path)} method to ensure input-files are + not split-up and are processed as a whole by {@link Mapper}s.]]> + + + + + + + + + + + + + + + + + + + true if the job output should be compressed, + false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tasks' Side-Effect Files + +

    Some applications need to create/write-to side-files, which differ from + the actual job-outputs. + +

    In such cases there could be issues with 2 instances of the same TIP + (running simultaneously e.g. speculative tasks) trying to open/write-to the + same file (path) on HDFS. Hence the application-writer will have to pick + unique names per task-attempt (e.g. using the attemptid, say + attempt_200709221812_0001_m_000000_0), not just per TIP.

    + +

    To get around this the Map-Reduce framework helps the application-writer + out by maintaining a special + ${mapred.output.dir}/_temporary/_${taskid} + sub-directory for each task-attempt on HDFS where the output of the + task-attempt goes. On successful completion of the task-attempt the files + in the ${mapred.output.dir}/_temporary/_${taskid} (only) + are promoted to ${mapred.output.dir}. Of course, the + framework discards the sub-directory of unsuccessful task-attempts. This + is completely transparent to the application.

    + +

    The application-writer can take advantage of this by creating any + side-files required in ${mapred.work.output.dir} during execution + of his reduce-task i.e. via {@link #getWorkOutputPath(JobConf)}, and the + framework will move them out similarly - thus she doesn't have to pick + unique paths per task-attempt.

    + +

    Note: the value of ${mapred.work.output.dir} during + execution of a particular task-attempt is actually + ${mapred.output.dir}/_temporary/_{$taskid}, and this value is + set by the map-reduce framework. So, just create any side-files in the + path returned by {@link #getWorkOutputPath(JobConf)} from map/reduce + task to take advantage of this feature.

    + +

    The entire discussion holds true for maps of jobs with + reducer=NONE (i.e. 0 reduces) since output of the map, in that case, + goes directly to HDFS.

    + + @return the {@link Path} to the task's temporary output directory + for the map-reduce job.]]> +
    +
    + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This method is used to validate the input directories when a job is + submitted so that the {@link JobClient} can fail early, with an useful + error message, in case of errors. For e.g. input directory does not exist. +

    + + @param job job configuration. + @throws InvalidInputException if the job does not have valid input + @deprecated getSplits is called in the client and can perform any + necessary validation of the input]]> +
    +
    + + + + + + Each {@link InputSplit} is then assigned to an individual {@link Mapper} + for processing.

    + +

    Note: The split is a logical split of the inputs and the + input files are not physically split into chunks. For e.g. a split could + be <input-file-path, start, offset> tuple. + + @param job job configuration. + @param numSplits the desired number of splits, a hint. + @return an array of {@link InputSplit}s for the job.]]> + + + + + + + + + It is the responsibility of the RecordReader to respect + record boundaries while processing the logical split to present a + record-oriented view to the individual task.

    + + @param split the {@link InputSplit} + @param job the job that this split belongs to + @return a {@link RecordReader}]]> +
    +
    + + InputFormat describes the input-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the InputFormat of the + job to:

    +

      +
    1. + Validate the input-specification of the job. +
    2. + Split-up the input file(s) into logical {@link InputSplit}s, each of + which is then assigned to an individual {@link Mapper}. +
    3. +
    4. + Provide the {@link RecordReader} implementation to be used to glean + input records from the logical InputSplit for processing by + the {@link Mapper}. +
    5. +
    + +

    The default behavior of file-based {@link InputFormat}s, typically + sub-classes of {@link FileInputFormat}, is to split the + input into logical {@link InputSplit}s based on the total size, in + bytes, of the input files. However, the {@link FileSystem} blocksize of + the input files is treated as an upper bound for input splits. A lower bound + on the split size can be set via + + mapred.min.split.size.

    + +

    Clearly, logical splits based on input-size is insufficient for many + applications since record boundaries are to respected. In such cases, the + application has to also implement a {@link RecordReader} on whom lies the + responsibilty to respect record-boundaries and present a record-oriented + view of the logical InputSplit to the individual task. + + @see InputSplit + @see RecordReader + @see JobClient + @see FileInputFormat]]> + + + + + + + + + + InputSplit. + + @return the number of bytes in the input split. + @throws IOException]]> + + + + + + InputSplit is + located as an array of Strings. + @throws IOException]]> + + + + InputSplit represents the data to be processed by an + individual {@link Mapper}. + +

    Typically, it presents a byte-oriented view on the input and is the + responsibility of {@link RecordReader} of the job to process this and present + a record-oriented view. + + @see InputFormat + @see RecordReader]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobClient.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + jobid doesn't correspond to any known job. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobClient is the primary interface for the user-job to interact + with the {@link JobTracker}. + + JobClient provides facilities to submit jobs, track their + progress, access component-tasks' reports/logs, get the Map-Reduce cluster + status information etc. + +

    The job submission process involves: +

      +
    1. + Checking the input and output specifications of the job. +
    2. +
    3. + Computing the {@link InputSplit}s for the job. +
    4. +
    5. + Setup the requisite accounting information for the {@link DistributedCache} + of the job, if necessary. +
    6. +
    7. + Copying the job's jar and configuration to the map-reduce system directory + on the distributed file-system. +
    8. +
    9. + Submitting the job to the JobTracker and optionally monitoring + it's status. +
    10. +

    + + Normally the user creates the application, describes various facets of the + job via {@link JobConf} and then uses the JobClient to submit + the job and monitor its progress. + +

    Here is an example on how to use JobClient:

    +

    +     // Create a new JobConf
    +     JobConf job = new JobConf(new Configuration(), MyJob.class);
    +     
    +     // Specify various job-specific parameters     
    +     job.setJobName("myjob");
    +     
    +     job.setInputPath(new Path("in"));
    +     job.setOutputPath(new Path("out"));
    +     
    +     job.setMapperClass(MyJob.MyMapper.class);
    +     job.setReducerClass(MyJob.MyReducer.class);
    +
    +     // Submit the job, then poll for progress until the job is complete
    +     JobClient.runJob(job);
    + 

    + +

    Job Control

    + +

    At times clients would chain map-reduce jobs to accomplish complex tasks + which cannot be done via a single map-reduce job. This is fairly easy since + the output of the job, typically, goes to distributed file-system and that + can be used as the input for the next job.

    + +

    However, this also means that the onus on ensuring jobs are complete + (success/failure) lies squarely on the clients. In such situations the + various job-control options are: +

      +
    1. + {@link #runJob(JobConf)} : submits the job and returns only after + the job has completed. +
    2. +
    3. + {@link #submitJob(JobConf)} : only submits the job, then poll the + returned handle to the {@link RunningJob} to query status and make + scheduling decisions. +
    4. +
    5. + {@link JobConf#setJobEndNotificationURI(String)} : setup a notification + on job-completion, thus avoiding polling. +
    6. +

    + + @see JobConf + @see ClusterStatus + @see Tool + @see DistributedCache]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if framework should keep the intermediate files + for failed tasks, false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Note: +

    + @param dir the {@link Path} of the output directory for the map-reduce job.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the outputs of the maps are to be compressed, + false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This comparator should be provided if the equivalence rules for keys + for sorting the intermediates are different from those for grouping keys + before each call to + {@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.

    + +

    For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed + in a single call to the reduce function if K1 and K2 compare as equal.

    + +

    Since {@link #setOutputKeyComparatorClass(Class)} can be used to control + how keys are sorted, this can be used in conjunction to simulate + secondary sort on values.

    + +

    Note: This is not a guarantee of the reduce sort being + stable in any sense. (In any case, with the order of available + map-outputs to the reduce being non-deterministic, it wouldn't make + that much sense.)

    + + @param theClass the comparator class to be used for grouping keys. + It should implement RawComparator. + @see #setOutputKeyComparatorClass(Class)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + combiner class used to combine map-outputs + before being sent to the reducers. Typically the combiner is same as the + the {@link Reducer} for the job i.e. {@link #getReducerClass()}. + + @return the user-defined combiner class used to combine map-outputs.]]> + + + + + + combiner class used to combine map-outputs + before being sent to the reducers. + +

    The combiner is a task-level aggregation operation which, in some cases, + helps to cut down the amount of data transferred from the {@link Mapper} to + the {@link Reducer}, leading to better performance.

    + +

    Typically the combiner is same as the Reducer for the + job i.e. {@link #setReducerClass(Class)}.

    + + @param theClass the user-defined combiner class used to combine + map-outputs.]]> +
    +
    + + + + + + + + + + + true. + + @return true if speculative execution be used for this job, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on, else false.]]> + + + + + true. + + @return true if speculative execution be + used for this job for map tasks, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on for map tasks, + else false.]]> + + + + + true. + + @return true if speculative execution be used + for reduce tasks for this job, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on for reduce tasks, + else false.]]> + + + + + 1. + + @return the number of reduce tasks for this job.]]> + + + + + + Note: This is only a hint to the framework. The actual + number of spawned map tasks depends on the number of {@link InputSplit}s + generated by the job's {@link InputFormat#getSplits(JobConf, int)}. + + A custom {@link InputFormat} is typically used to accurately control + the number of map tasks for the job.

    + +

    How many maps?

    + +

    The number of maps is usually driven by the total size of the inputs + i.e. total number of blocks of the input files.

    + +

    The right level of parallelism for maps seems to be around 10-100 maps + per-node, although it has been set up to 300 or so for very cpu-light map + tasks. Task setup takes awhile, so it is best if the maps take at least a + minute to execute.

    + +

    The default behavior of file-based {@link InputFormat}s is to split the + input into logical {@link InputSplit}s based on the total size, in + bytes, of input files. However, the {@link FileSystem} blocksize of the + input files is treated as an upper bound for input splits. A lower bound + on the split size can be set via + + mapred.min.split.size.

    + +

    Thus, if you expect 10TB of input data and have a blocksize of 128MB, + you'll end up with 82,000 maps, unless {@link #setNumMapTasks(int)} is + used to set it even higher.

    + + @param n the number of map tasks for this job. + @see InputFormat#getSplits(JobConf, int) + @see FileInputFormat + @see FileSystem#getDefaultBlockSize() + @see FileStatus#getBlockSize()]]> +
    +
    + + + 1. + + @return the number of reduce tasks for this job.]]> + + + + + + How many reduces? + +

    The right number of reduces seems to be 0.95 or + 1.75 multiplied by (<no. of nodes> * + + mapred.tasktracker.reduce.tasks.maximum). +

    + +

    With 0.95 all of the reduces can launch immediately and + start transfering map outputs as the maps finish. With 1.75 + the faster nodes will finish their first round of reduces and launch a + second wave of reduces doing a much better job of load balancing.

    + +

    Increasing the number of reduces increases the framework overhead, but + increases load balancing and lowers the cost of failures.

    + +

    The scaling factors above are slightly less than whole numbers to + reserve a few reduce slots in the framework for speculative-tasks, failures + etc.

    + +

    Reducer NONE

    + +

    It is legal to set the number of reduce-tasks to zero.

    + +

    In this case the output of the map-tasks directly go to distributed + file-system, to the path set by + {@link FileOutputFormat#setOutputPath(JobConf, Path)}. Also, the + framework doesn't sort the map-outputs before writing it out to HDFS.

    + + @param n the number of reduce tasks for this job.]]> +
    +
    + + + mapred.map.max.attempts + property. If this property is not already set, the default is 4 attempts. + + @return the max number of attempts per map task.]]> + + + + + + + + + + + mapred.reduce.max.attempts + property. If this property is not already set, the default is 4 attempts. + + @return the max number of attempts per reduce task.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + noFailures, the + tasktracker is blacklisted for this job. + + @param noFailures maximum no. of failures of a given job per tasktracker.]]> + + + + + blacklisted for this job. + + @return the maximum no. of failures of a given job per tasktracker.]]> + + + + + failed. + + Defaults to zero, i.e. any failed map-task results in + the job being declared as {@link JobStatus#FAILED}. + + @return the maximum percentage of map tasks that can fail without + the job being aborted.]]> + + + + + + failed. + + @param percent the maximum percentage of map tasks that can fail without + the job being aborted.]]> + + + + + failed. + + Defaults to zero, i.e. any failed reduce-task results + in the job being declared as {@link JobStatus#FAILED}. + + @return the maximum percentage of reduce tasks that can fail without + the job being aborted.]]> + + + + + + failed. + + @param percent the maximum percentage of reduce tasks that can fail without + the job being aborted.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The debug script can aid debugging of failed map tasks. The script is + given task's stdout, stderr, syslog, jobconf files as arguments.

    + +

    The debug command, run on the node where the map failed, is:

    +

    + $script $stdout $stderr $syslog $jobconf. +

    + +

    The script file is distributed through {@link DistributedCache} + APIs. The script needs to be symlinked.

    + +

    Here is an example on how to submit a script +

    + job.setMapDebugScript("./myscript");
    + DistributedCache.createSymlink(job);
    + DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
    + 

    + + @param mDbgScript the script name]]> +
    +
    + + + + + + + + + The debug script can aid debugging of failed reduce tasks. The script + is given task's stdout, stderr, syslog, jobconf files as arguments.

    + +

    The debug command, run on the node where the map failed, is:

    +

    + $script $stdout $stderr $syslog $jobconf. +

    + +

    The script file is distributed through {@link DistributedCache} + APIs. The script file needs to be symlinked

    + +

    Here is an example on how to submit a script +

    + job.setReduceDebugScript("./myscript");
    + DistributedCache.createSymlink(job);
    + DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
    + 

    + + @param rDbgScript the script name]]> +
    +
    + + + + + + + + null if it hasn't + been set. + @see #setJobEndNotificationURI(String)]]> + + + + + + The uri can contain 2 special parameters: $jobId and + $jobStatus. Those, if present, are replaced by the job's + identifier and completion-status respectively.

    + +

    This is typically used by application-writers to implement chaining of + Map-Reduce jobs in an asynchronous manner.

    + + @param uri the job end notification uri + @see JobStatus + @see Job Completion and Chaining]]> +
    +
    + + + + When a job starts, a shared directory is created at location + + ${mapred.local.dir}/taskTracker/jobcache/$jobid/work/ . + This directory is exposed to the users through + job.local.dir . + So, the tasks can use this space + as scratch space and share files among them.

    + This value is available as System property also. + + @return The localized job specific shared directory]]> +
    +
    + + JobConf is the primary interface for a user to describe a + map-reduce job to the Hadoop framework for execution. The framework tries to + faithfully execute the job as-is described by JobConf, however: +
      +
    1. + Some configuration parameters might have been marked as + + final by administrators and hence cannot be altered. +
    2. +
    3. + While some job parameters are straight-forward to set + (e.g. {@link #setNumReduceTasks(int)}), some parameters interact subtly + rest of the framework and/or job-configuration and is relatively more + complex for the user to control finely (e.g. {@link #setNumMapTasks(int)}). +
    4. +

    + +

    JobConf typically specifies the {@link Mapper}, combiner + (if any), {@link Partitioner}, {@link Reducer}, {@link InputFormat} and + {@link OutputFormat} implementations to be used etc. + +

    Optionally JobConf is used to specify other advanced facets + of the job such as Comparators to be used, files to be put in + the {@link DistributedCache}, whether or not intermediate and/or job outputs + are to be compressed (and how), debugability via user-provided scripts + ( {@link #setMapDebugScript(String)}/{@link #setReduceDebugScript(String)}), + for doing post-processing on task logs, task's stdout, stderr, syslog. + and etc.

    + +

    Here is an example on how to configure a job via JobConf:

    +

    +     // Create a new JobConf
    +     JobConf job = new JobConf(new Configuration(), MyJob.class);
    +     
    +     // Specify various job-specific parameters     
    +     job.setJobName("myjob");
    +     
    +     FileInputFormat.setInputPaths(job, new Path("in"));
    +     FileOutputFormat.setOutputPath(job, new Path("out"));
    +     
    +     job.setMapperClass(MyJob.MyMapper.class);
    +     job.setCombinerClass(MyJob.MyReducer.class);
    +     job.setReducerClass(MyJob.MyReducer.class);
    +     
    +     job.setInputFormat(SequenceFileInputFormat.class);
    +     job.setOutputFormat(SequenceFileOutputFormat.class);
    + 

    + + @see JobClient + @see ClusterStatus + @see Tool + @see DistributedCache]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + .]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + any job + run on the jobtracker started at 200707121733, we would use : +
     
    + JobID.getTaskIDsPattern("200707121733", null);
    + 
    + which will return : +
     "job_200707121733_[0-9]*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @return a regex pattern matching JobIDs]]> +
    +
    + + + An example JobID is : + job_200707121733_0003 , which represents the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse JobID strings, but rather + use appropriate constructors or {@link #forName(String)} method. + + @see TaskID + @see TaskAttemptID + @see JobTracker#getNewJobId() + @see JobTracker#getStartTime()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -archives + -files inputjar args]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + zero. + + @param conf configuration for the JobTracker. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + io.file.buffer.size specified in the given + Configuration. + @param in input stream + @param conf configuration + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Output pairs need not be of the same types as input pairs. A given + input pair may map to zero or many output pairs. Output pairs are + collected with calls to + {@link OutputCollector#collect(Object,Object)}.

    + +

    Applications can use the {@link Reporter} provided to report progress + or just indicate that they are alive. In scenarios where the application + takes an insignificant amount of time to process individual key/value + pairs, this is crucial since the framework might assume that the task has + timed-out and kill that task. The other way of avoiding this is to set + + mapred.task.timeout to a high-enough value (or even zero for no + time-outs).

    + + @param key the input key. + @param value the input value. + @param output collects mapped keys and values. + @param reporter facility to report progress.]]> +
    + + + Maps are the individual tasks which transform input records into a + intermediate records. The transformed intermediate records need not be of + the same type as the input records. A given input pair may map to zero or + many output pairs.

    + +

    The Hadoop Map-Reduce framework spawns one map task for each + {@link InputSplit} generated by the {@link InputFormat} for the job. + Mapper implementations can access the {@link JobConf} for the + job via the {@link JobConfigurable#configure(JobConf)} and initialize + themselves. Similarly they can use the {@link Closeable#close()} method for + de-initialization.

    + +

    The framework then calls + {@link #map(Object, Object, OutputCollector, Reporter)} + for each key/value pair in the InputSplit for that task.

    + +

    All intermediate values associated with a given output key are + subsequently grouped by the framework, and passed to a {@link Reducer} to + determine the final output. Users can control the grouping by specifying + a Comparator via + {@link JobConf#setOutputKeyComparatorClass(Class)}.

    + +

    The grouped Mapper outputs are partitioned per + Reducer. Users can control which keys (and hence records) go to + which Reducer by implementing a custom {@link Partitioner}. + +

    Users can optionally specify a combiner, via + {@link JobConf#setCombinerClass(Class)}, to perform local aggregation of the + intermediate outputs, which helps to cut down the amount of data transferred + from the Mapper to the Reducer. + +

    The intermediate, grouped outputs are always stored in + {@link SequenceFile}s. Applications can specify if and how the intermediate + outputs are to be compressed and which {@link CompressionCodec}s are to be + used via the JobConf.

    + +

    If the job has + zero + reduces then the output of the Mapper is directly written + to the {@link FileSystem} without grouping by keys.

    + +

    Example:

    +

    +     public class MyMapper<K extends WritableComparable, V extends Writable> 
    +     extends MapReduceBase implements Mapper<K, V, K, V> {
    +     
    +       static enum MyCounters { NUM_RECORDS }
    +       
    +       private String mapTaskId;
    +       private String inputFile;
    +       private int noRecords = 0;
    +       
    +       public void configure(JobConf job) {
    +         mapTaskId = job.get("mapred.task.id");
    +         inputFile = job.get("mapred.input.file");
    +       }
    +       
    +       public void map(K key, V val,
    +                       OutputCollector<K, V> output, Reporter reporter)
    +       throws IOException {
    +         // Process the <key, value> pair (assume this takes a while)
    +         // ...
    +         // ...
    +         
    +         // Let the framework know that we are alive, and kicking!
    +         // reporter.progress();
    +         
    +         // Process some more
    +         // ...
    +         // ...
    +         
    +         // Increment the no. of <key, value> pairs processed
    +         ++noRecords;
    +
    +         // Increment counters
    +         reporter.incrCounter(NUM_RECORDS, 1);
    +        
    +         // Every 100 records update application-level status
    +         if ((noRecords%100) == 0) {
    +           reporter.setStatus(mapTaskId + " processed " + noRecords + 
    +                              " from input-file: " + inputFile); 
    +         }
    +         
    +         // Output the result
    +         output.collect(key, val);
    +       }
    +     }
    + 

    + +

    Applications may write a custom {@link MapRunnable} to exert greater + control on map processing e.g. multi-threaded Mappers etc.

    + + @see JobConf + @see InputFormat + @see Partitioner + @see Reducer + @see MapReduceBase + @see MapRunnable + @see SequenceFile]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + Provides default no-op implementations for a few methods, most non-trivial + applications need to override some of them.

    ]]> +
    +
    + + + + + + + + + + + <key, value> pairs. + +

    Mapping of input records to output records is complete when this method + returns.

    + + @param input the {@link RecordReader} to read the input records. + @param output the {@link OutputCollector} to collect the outputrecords. + @param reporter {@link Reporter} to report progress, status-updates etc. + @throws IOException]]> +
    +
    + + Custom implementations of MapRunnable can exert greater + control on map processing e.g. multi-threaded, asynchronous mappers etc.

    + + @see Mapper]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + nearly + equal content length.
    + Subclasses implement {@link #getRecordReader(InputSplit, JobConf, Reporter)} + to construct RecordReader's for MultiFileSplit's. + @see MultiFileSplit]]> +
    +
    + + + + + + + + + + + + + + + + + th Path]]> + + + + + + + + + + + th Path]]> + + + + + + + + + + + + + + + + + + + + + + + MultiFileSplit can be used to implement {@link RecordReader}'s, with + reading one record per file. + @see FileSplit + @see MultiFileInputFormat]]> + + + + + + + + + + + + + + + <key, value> pairs output by {@link Mapper}s + and {@link Reducer}s. + +

    OutputCollector is the generalization of the facility + provided by the Map-Reduce framework to collect data output by either the + Mapper or the Reducer i.e. intermediate outputs + or the output of the job.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + This is to validate the output specification for the job when it is + a job is submitted. Typically checks that it does not already exist, + throwing an exception when it already exists, so that output is not + overwritten.

    + + @param ignored + @param job job configuration. + @throws IOException when output should not be attempted]]> +
    +
    + + OutputFormat describes the output-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the OutputFormat of the + job to:

    +

      +
    1. + Validate the output-specification of the job. For e.g. check that the + output directory doesn't already exist. +
    2. + Provide the {@link RecordWriter} implementation to be used to write out + the output files of the job. Output files are stored in a + {@link FileSystem}. +
    3. +
    + + @see RecordWriter + @see JobConf]]> +
    +
    + + + + + + + + + + + + + + + + + true if the job output should be compressed, + false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Typically a hash function on a all or a subset of the key.

    + + @param key the key to be paritioned. + @param value the entry value. + @param numPartitions the total number of partitions. + @return the partition number for the key.]]> +
    +
    + + Partitioner controls the partitioning of the keys of the + intermediate map-outputs. The key (or a subset of the key) is used to derive + the partition, typically by a hash function. The total number of partitions + is the same as the number of reduce tasks for the job. Hence this controls + which of the m reduce tasks the intermediate key (and hence the + record) is sent for reduction.

    + + @see Reducer]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.0 to 1.0. + @throws IOException]]> + + + + RecordReader reads <key, value> pairs from an + {@link InputSplit}. + +

    RecordReader, typically, converts the byte-oriented view of + the input, provided by the InputSplit, and presents a + record-oriented view for the {@link Mapper} & {@link Reducer} tasks for + processing. It thus assumes the responsibility of processing record + boundaries and presenting the tasks with keys and values.

    + + @see InputSplit + @see InputFormat]]> +
    +
    + + + + + + + + + + + + + + + + RecordWriter to future operations. + + @param reporter facility to report progress. + @throws IOException]]> + + + + RecordWriter writes the output <key, value> pairs + to an output file. + +

    RecordWriter implementations write the job outputs to the + {@link FileSystem}. + + @see OutputFormat]]> + + + + + + + + + + + + + + + Reduces values for a given key. + +

    The framework calls this method for each + <key, (list of values)> pair in the grouped inputs. + Output values must be of the same type as input values. Input keys must + not be altered. The framework will reuse the key and value objects + that are passed into the reduce, therefore the application should clone + the objects they want to keep a copy of. In many cases, all values are + combined into zero or one value. +

    + +

    Output pairs are collected with calls to + {@link OutputCollector#collect(Object,Object)}.

    + +

    Applications can use the {@link Reporter} provided to report progress + or just indicate that they are alive. In scenarios where the application + takes an insignificant amount of time to process individual key/value + pairs, this is crucial since the framework might assume that the task has + timed-out and kill that task. The other way of avoiding this is to set + + mapred.task.timeout to a high-enough value (or even zero for no + time-outs).

    + + @param key the key. + @param values the list of values to reduce. + @param output to collect keys and combined values. + @param reporter facility to report progress.]]> +
    + + + The number of Reducers for the job is set by the user via + {@link JobConf#setNumReduceTasks(int)}. Reducer implementations + can access the {@link JobConf} for the job via the + {@link JobConfigurable#configure(JobConf)} method and initialize themselves. + Similarly they can use the {@link Closeable#close()} method for + de-initialization.

    + +

    Reducer has 3 primary phases:

    +
      +
    1. + +

      Shuffle

      + +

      Reducer is input the grouped output of a {@link Mapper}. + In the phase the framework, for each Reducer, fetches the + relevant partition of the output of all the Mappers, via HTTP. +

      +
    2. + +
    3. +

      Sort

      + +

      The framework groups Reducer inputs by keys + (since different Mappers may have output the same key) in this + stage.

      + +

      The shuffle and sort phases occur simultaneously i.e. while outputs are + being fetched they are merged.

      + +
      SecondarySort
      + +

      If equivalence rules for keys while grouping the intermediates are + different from those for grouping keys before reduction, then one may + specify a Comparator via + {@link JobConf#setOutputValueGroupingComparator(Class)}.Since + {@link JobConf#setOutputKeyComparatorClass(Class)} can be used to + control how intermediate keys are grouped, these can be used in conjunction + to simulate secondary sort on values.

      + + + For example, say that you want to find duplicate web pages and tag them + all with the url of the "best" known example. You would set up the job + like: +
        +
      • Map Input Key: url
      • +
      • Map Input Value: document
      • +
      • Map Output Key: document checksum, url pagerank
      • +
      • Map Output Value: url
      • +
      • Partitioner: by checksum
      • +
      • OutputKeyComparator: by checksum and then decreasing pagerank
      • +
      • OutputValueGroupingComparator: by checksum
      • +
      +
    4. + +
    5. +

      Reduce

      + +

      In this phase the + {@link #reduce(Object, Iterator, OutputCollector, Reporter)} + method is called for each <key, (list of values)> pair in + the grouped inputs.

      +

      The output of the reduce task is typically written to the + {@link FileSystem} via + {@link OutputCollector#collect(Object, Object)}.

      +
    6. +
    + +

    The output of the Reducer is not re-sorted.

    + +

    Example:

    +

    +     public class MyReducer<K extends WritableComparable, V extends Writable> 
    +     extends MapReduceBase implements Reducer<K, V, K, V> {
    +     
    +       static enum MyCounters { NUM_RECORDS }
    +        
    +       private String reduceTaskId;
    +       private int noKeys = 0;
    +       
    +       public void configure(JobConf job) {
    +         reduceTaskId = job.get("mapred.task.id");
    +       }
    +       
    +       public void reduce(K key, Iterator<V> values,
    +                          OutputCollector<K, V> output, 
    +                          Reporter reporter)
    +       throws IOException {
    +       
    +         // Process
    +         int noValues = 0;
    +         while (values.hasNext()) {
    +           V value = values.next();
    +           
    +           // Increment the no. of values for this key
    +           ++noValues;
    +           
    +           // Process the <key, value> pair (assume this takes a while)
    +           // ...
    +           // ...
    +           
    +           // Let the framework know that we are alive, and kicking!
    +           if ((noValues%10) == 0) {
    +             reporter.progress();
    +           }
    +         
    +           // Process some more
    +           // ...
    +           // ...
    +           
    +           // Output the <key, value> 
    +           output.collect(key, value);
    +         }
    +         
    +         // Increment the no. of <key, list of values> pairs processed
    +         ++noKeys;
    +         
    +         // Increment counters
    +         reporter.incrCounter(NUM_RECORDS, 1);
    +         
    +         // Every 100 keys update application-level status
    +         if ((noKeys%100) == 0) {
    +           reporter.setStatus(reduceTaskId + " processed " + noKeys);
    +         }
    +       }
    +     }
    + 

    + + @see Mapper + @see Partitioner + @see Reporter + @see MapReduceBase]]> +
    +
    + + + + + + + + + + + + + + + Enum. + @param amount A non-negative amount by which the counter is to + be incremented.]]> + + + + + + + + + + + + + + InputSplit that the map is reading from. + @throws UnsupportedOperationException if called outside a mapper]]> + + + + + + + + + {@link Mapper} and {@link Reducer} can use the Reporter + provided to report progress or just indicate that they are alive. In + scenarios where the application takes an insignificant amount of time to + process individual key/value pairs, this is crucial since the framework + might assume that the task has timed-out and kill that task. + +

    Applications can also update {@link Counters} via the provided + Reporter .

    + + @see Progressable + @see Counters]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + progress of the job's map-tasks, as a float between 0.0 + and 1.0. When all map tasks have completed, the function returns 1.0. + + @return the progress of the job's map-tasks. + @throws IOException]]> + + + + + + progress of the job's reduce-tasks, as a float between 0.0 + and 1.0. When all reduce tasks have completed, the function returns 1.0. + + @return the progress of the job's reduce-tasks. + @throws IOException]]> + + + + + + true if the job is complete, else false. + @throws IOException]]> + + + + + + true if the job succeeded, else false. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RunningJob is the user-interface to query for details on a + running Map-Reduce job. + +

    Clients can get hold of RunningJob via the {@link JobClient} + and then query the running-job for details such as name, configuration, + progress etc.

    + + @see JobClient]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This allows the user to specify the key class to be different + from the actual class ({@link BytesWritable}) used for writing

    + + @param conf the {@link JobConf} to modify + @param theClass the SequenceFile output key class.]]> +
    +
    + + + + + This allows the user to specify the value class to be different + from the actual class ({@link BytesWritable}) used for writing

    + + @param conf the {@link JobConf} to modify + @param theClass the SequenceFile output key class.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + f. The filtering criteria is + MD5(key) % f == 0.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + f using + the criteria record# % f == 0. + For example, if the frequency is 10, one out of 10 records is returned.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + . + @param name The name of the server + @param port The port to use on the server + @param findPort whether the server should start at the given port and + increment by 1 until it finds a free port.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + points to the log directory + "/static/" -> points to common static files (src/webapps/static) + "/" -> the jsp server code from (src/webapps/)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + all task attempt IDs + of any jobtracker, in any job, of the first + map task, we would use : +
     
    + TaskAttemptID.getTaskAttemptIDsPattern(null, null, true, 1, null);
    + 
    + which will return : +
     "attempt_[^_]*_[0-9]*_m_000001_[0-9]*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @param isMap whether the tip is a map, or null + @param taskId taskId number, or null + @param attemptId the task attempt number, or null + @return a regex pattern matching TaskAttemptIDs]]> +
    +
    + + + An example TaskAttemptID is : + attempt_200707121733_0003_m_000005_0 , which represents the + zeroth task attempt for the fifth map task in the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse TaskAttemptID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the first map task + of any jobtracker, of any job, we would use : +

     
    + TaskID.getTaskIDsPattern(null, null, true, 1);
    + 
    + which will return : +
     "task_[^_]*_[0-9]*_m_000001*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @param isMap whether the tip is a map, or null + @param taskId taskId number, or null + @return a regex pattern matching TaskIDs]]> +
    + + + + An example TaskID is : + task_200707121733_0003_m_000005 , which represents the + fifth map task in the third job running at the jobtracker + started at 200707121733. +

    + Applications should never construct or parse TaskID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskAttemptID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hadoop.log.dir.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the Job was added.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ([,]*) + func ::= tbl(,"") + class ::= @see java.lang.Class#forName(java.lang.String) + path ::= @see org.apache.hadoop.fs.Path#Path(java.lang.String) + } + Reads expression from the mapred.join.expr property and + user-supplied join types from mapred.join.define.<ident> + types. Paths supplied to tbl are given as input paths to the + InputFormat class listed. + @see #compose(java.lang.String, java.lang.Class, java.lang.String...)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ,

    ) }]]> + + + + + + + + (tbl(,),tbl(,),...,tbl(,)) }]]> + + + + + + + + (tbl(,),tbl(,),...,tbl(,)) }]]> + + + + mapred.join.define.<ident> to a classname. In the expression + mapred.join.expr, the identifier will be assumed to be a + ComposableRecordReader. + mapred.join.keycomparator can be a classname used to compare keys + in the join. + @see JoinRecordReader + @see MultiFilterRecordReader]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ...... + }]]> + + + + + + + + + + + + + + + + + + + + + capacity children to position + id in the parent reader. + The id of a root CompositeRecordReader is -1 by convention, but relying + on this is not recommended.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + override(S1,S2,S3) will prefer values + from S3 over S2, and values from S2 over S1 for all keys + emitted from all sources.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [,,...,]]]> + + + + + + + out. + TupleWritable format: + {@code + ...... + }]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It can be used instead of the default implementation, + @link org.apache.hadoop.mapred.MapRunner, when the Map operation is not CPU + bound in order to improve throughput. +

    + Map implementations using this MapRunnable must be thread-safe. +

    + The Map-Reduce job has to be configured to use this MapRunnable class (using + the JobConf.setMapRunnerClass method) and + the number of thread the thread-pool can use with the + mapred.map.multithreadedrunner.threads property, its default + value is 10 threads. +

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + pairs. Uses + {@link StringTokenizer} to break text into tokens.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + generateKeyValPairs(Object key, Object value); public void + configure(JobConfjob); } + + The package also provides a base class, ValueAggregatorBaseDescriptor, + implementing the above interface. The user can extend the base class and + implement generateKeyValPairs accordingly. + + The primary work of generateKeyValPairs is to emit one or more key/value + pairs based on the input key/value pair. The key in an output key/value pair + encode two pieces of information: aggregation type and aggregation id. The + value will be aggregated onto the aggregation id according the aggregation + type. + + This class offers a function to generate a map/reduce job using Aggregate + framework. The function takes the following parameters: input directory spec + input format (text or sequence file) output directory a file specifying the + user plugin class]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When constructing the instance, if the factory property + contextName.class exists, + its value is taken to be the name of the class to instantiate. Otherwise, + the default is to create an instance of + org.apache.hadoop.metrics.spi.NullContext, which is a + dummy "no-op" context which will cause all metric data to be discarded. + + @param contextName the name of the context + @return the named MetricsContext]]> + + + + + + + + + + + + + + When the instance is constructed, this method checks if the file + hadoop-metrics.properties exists on the class path. If it + exists, it must be in the format defined by java.util.Properties, and all + the properties in the file are set as attributes on the newly created + ContextFactory instance. + + @return the singleton ContextFactory instance]]> + + + + getFactory() method.]]> + + + + + + + + + + + + + + + + + + + startMonitoring() again after calling + this. + @see #close()]]> + + + + + + + + + + + + + + + + recordName. + Throws an exception if the metrics implementation is configured with a fixed + set of record names and recordName is not in that set. + + @param recordName the name of the record + @throws MetricsException if recordName conflicts with configuration data]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A record name identifies the kind of data to be reported. For example, a + program reporting statistics relating to the disks on a computer might use + a record name "diskStats".

    + + A record has zero or more tags. A tag has a name and a value. To + continue the example, the "diskStats" record might use a tag named + "diskName" to identify a particular disk. Sometimes it is useful to have + more than one tag, so there might also be a "diskType" with value "ide" or + "scsi" or whatever.

    + + A record also has zero or more metrics. These are the named + values that are to be reported to the metrics system. In the "diskStats" + example, possible metric names would be "diskPercentFull", "diskPercentBusy", + "kbReadPerSecond", etc.

    + + The general procedure for using a MetricsRecord is to fill in its tag and + metric values, and then call update() to pass the record to the + client library. + Metric data is not immediately sent to the metrics system + each time that update() is called. + An internal table is maintained, identified by the record name. This + table has columns + corresponding to the tag and the metric names, and rows + corresponding to each unique set of tag values. An update + either modifies an existing row in the table, or adds a new row with a set of + tag values that are different from all the other rows. Note that if there + are no tags, then there can be at most one row in the table.

    + + Once a row is added to the table, its data will be sent to the metrics system + on every timer period, whether or not it has been updated since the previous + timer period. If this is inappropriate, for example if metrics were being + reported by some transient object in an application, the remove() + method can be used to remove the row and thus stop the data from being + sent.

    + + Note that the update() method is atomic. This means that it is + safe for different threads to be updating the same metric. More precisely, + it is OK for different threads to call update() on MetricsRecord instances + with the same set of tag names and tag values. Different threads should + not use the same MetricsRecord instance at the same time.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MetricsContext.registerUpdater().]]> + + + + + + + + + + + + + + + + + + + + + + + + + fileName attribute, + if specified. Otherwise the data will be written to standard + output.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is configured by setting ContextFactory attributes which in turn + are usually configured through a properties file. All the attributes are + prefixed by the contextName. For example, the properties file might contain: +

    + myContextName.fileName=/tmp/metrics.log
    + myContextName.period=5
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + contextName.tableName. The returned map consists of + those attributes with the contextName and tableName stripped off.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + recordName. + Throws an exception if the metrics implementation is configured with a fixed + set of record names and recordName is not in that set. + + @param recordName the name of the record + @throws MetricsException if recordName conflicts with configuration data]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class implements the internal table of metric data, and the timer + on which data is to be sent to the metrics system. Subclasses must + override the abstract emitRecord method in order to transmit + the data.

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + update + and remove().]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hostname or hostname:port. If + the specs string is null, defaults to localhost:defaultPort. + + @return a list of InetSocketAddress objects.]]> + + + + + + + + + + + + + + + + + + + ,name=" + Where the and are the supplied parameters + + @param serviceName + @param nameName + @param theMbean - the MBean to register + @return the named used to register the MBean]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hadoop.rpc.socket.factory.class.<ClassName>. When no + such parameter exists then fall back on the default socket factory as + configured by hadoop.rpc.socket.factory.class.default. If + this default socket factory is not configured, then fall back on the JVM + default socket factory. + + @param conf the configuration + @param clazz the class (usually a {@link VersionedProtocol}) + @return a socket factory]]> + + + + + + hadoop.rpc.socket.factory.default + + @param conf the configuration + @return the default socket factory as specified in the configuration or + the JVM default socket factory if the configuration does not + contain a default socket factory property.]]> + + + + + + + + + + + + + : + ://:/]]> + + + + + + + + : + ://:/]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + From documentation for {@link #getInputStream(Socket, long)}:
    + Returns InputStream for the socket. If the socket has an associated + SocketChannel then it returns a + {@link SocketInputStream} with the given timeout. If the socket does not + have a channel, {@link Socket#getInputStream()} is returned. In the later + case, the timeout argument is ignored and the timeout set with + {@link Socket#setSoTimeout(int)} applies for reads.

    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getInputStream()}. + + @see #getInputStream(Socket, long) + + @param socket + @return InputStream for reading from the socket. + @throws IOException]]> +
    +
    + + + + + +
    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getInputStream()}. + + @see Socket#getChannel() + + @param socket + @param timeout timeout in milliseconds. This may not always apply. zero + for waiting as long as necessary. + @return InputStream for reading from the socket. + @throws IOException]]> +
    +
    + + + + +
    + + From documentation for {@link #getOutputStream(Socket, long)} :
    + Returns OutputStream for the socket. If the socket has an associated + SocketChannel then it returns a + {@link SocketOutputStream} with the given timeout. If the socket does not + have a channel, {@link Socket#getOutputStream()} is returned. In the later + case, the timeout argument is ignored and the write will wait until + data is available.

    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getOutputStream()}. + + @see #getOutputStream(Socket, long) + + @param socket + @return OutputStream for writing to the socket. + @throws IOException]]> +
    +
    + + + + + +
    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getOutputStream()}. + + @see Socket#getChannel() + + @param socket + @param timeout timeout in milliseconds. This may not always apply. zero + for waiting as long as necessary. + @return OutputStream for writing to the socket. + @throws IOException]]> +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + node + + @param node + a node + @return true if node is already in the tree; false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + scope + if scope starts with ~, choose one from the all nodes except for the + ones in scope; otherwise, choose one from scope + @param scope range of nodes from which a node will be choosen + @return the choosen node]]> + + + + + + + scope but not in excludedNodes + if scope starts with ~, return the number of nodes that are not + in scope and excludedNodes; + @param scope a path string that may start with ~ + @param excludedNodes a list of nodes + @return number of available nodes]]> + + + + + + + + + + + + reader + It linearly scans the array, if a local node is found, swap it with + the first element of the array. + If a local rack node is found, swap it with the first element following + the local node. + If neither local node or local rack node is found, put a random replica + location at postion 0. + It leaves the rest nodes untouched.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + Create a new input stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + + @see SocketInputStream#SocketInputStream(ReadableByteChannel, long) + + @param socket should have a channel associated with it. + @param timeout timeout timeout in milliseconds. must not be negative. + @throws IOException]]> +
    +
    + + + +
    + + Create a new input stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + @see SocketInputStream#SocketInputStream(ReadableByteChannel, long) + + @param socket should have a channel associated with it. + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + +
    + + Create a new ouput stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + + @see SocketOutputStream#SocketOutputStream(WritableByteChannel, long) + + @param socket should have a channel associated with it. + @param timeout timeout timeout in milliseconds. must not be negative. + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + = getCount(). + @param newCapacity The new capacity in bytes.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Index idx = startVector(...); + while (!idx.done()) { + .... // read element of a vector + idx.incr(); + } + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This task takes the given record definition files and compiles them into + java or c++ + files. It is then up to the user to compile the generated files. + +

    The task requires the file or the nested fileset element to be + specified. Optional attributes are language (set the output + language, default is "java"), + destdir (name of the destination directory for generated java/c++ + code, default is ".") and failonerror (specifies error handling + behavior. default is true). +

    Usage

    +
    + <recordcc
    +       destdir="${basedir}/gensrc"
    +       language="java">
    +   <fileset include="**\/*.jr" />
    + </recordcc>
    + 
    ]]> +
    +
    + +
    + + + + + + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ugi as a comma separated string in + conf as a property attr + + The String starts with the user name followed by the default group names, + and other group names. + + @param conf configuration + @param attr property name + @param ugi a UnixUserGroupInformation]]> + + + + + + + + conf + + The object is expected to store with the property name attr + as a comma separated string that starts + with the user name followed by group names. + If the property name is not defined, return null. + It's assumed that there is only one UGI per user. If this user already + has a UGI in the ugi map, return the ugi in the map. + Otherwise, construct a UGI from the configuration, store it in the + ugi map and return it. + + @param conf configuration + @param attr property name + @return a UnixUGI + @throws LoginException if the stored string is ill-formatted.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This tool supports archiving and anaylzing (sort/grep) of log-files. + It takes as input + a) Input uri which will serve uris of the logs to be archived. + b) Output directory (not mandatory). + b) Directory on dfs to archive the logs. + c) The sort/grep patterns for analyzing the files and separator for boundaries. + Usage: + Logalyzer -archive -archiveDir -analysis -logs -grep -sort -separator +

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GenericOptionsParser to parse only the generic Hadoop + arguments. + + The array of string arguments other than the generic arguments can be + obtained by {@link #getRemainingArgs()}. + + @param conf the Configuration to modify. + @param args command-line arguments.]]> + + + + + GenericOptionsParser to parse given options as well + as generic Hadoop options. + + The resulting CommandLine object can be obtained by + {@link #getCommandLine()}. + + @param conf the configuration to modify + @param options options built by the caller + @param args User-specified arguments]]> + + + + + Strings containing the un-parsed arguments + or empty array if commandLine was not defined.]]> + + + + + CommandLine object + to process the parsed arguments. + + Note: If the object is created with + {@link #GenericOptionsParser(Configuration, String[])}, then returned + object will only contain parsed generic options. + + @return CommandLine representing list of arguments + parsed against Options descriptor.]]> + + + + + + + + + + GenericOptionsParser is a utility to parse command line + arguments generic to the Hadoop framework. + + GenericOptionsParser recognizes several standarad command + line arguments, enabling applications to easily specify a namenode, a + jobtracker, additional configuration resources etc. + +

    Generic Options

    + +

    The supported generic options are:

    +

    +     -conf <configuration file>     specify a configuration file
    +     -D <property=value>            use value for given property
    +     -fs <local|namenode:port>      specify a namenode
    +     -jt <local|jobtracker:port>    specify a job tracker
    +     -files <comma separated list of files>    specify comma separated
    +                            files to be copied to the map reduce cluster
    +     -libjars <comma separated list of jars>   specify comma separated
    +                            jar files to include in the classpath.
    +     -archives <comma separated list of archives>    specify comma
    +             separated archives to be unarchived on the compute machines.
    +
    + 

    + +

    The general command line syntax is:

    +

    + bin/hadoop command [genericOptions] [commandOptions]
    + 

    + +

    Generic command line arguments might modify + Configuration objects, given to constructors.

    + +

    The functionality is implemented using Commons CLI.

    + +

    Examples:

    +

    + $ bin/hadoop dfs -fs darwin:8020 -ls /data
    + list /data directory in dfs with namenode darwin:8020
    + 
    + $ bin/hadoop dfs -D fs.default.name=darwin:8020 -ls /data
    + list /data directory in dfs with namenode darwin:8020
    +     
    + $ bin/hadoop dfs -conf hadoop-site.xml -ls /data
    + list /data directory in dfs with conf specified in hadoop-site.xml
    +     
    + $ bin/hadoop job -D mapred.job.tracker=darwin:50020 -submit job.xml
    + submit a job to job tracker darwin:50020
    +     
    + $ bin/hadoop job -jt darwin:50020 -submit job.xml
    + submit a job to job tracker darwin:50020
    +     
    + $ bin/hadoop job -jt local -submit job.xml
    + submit a job to local runner
    + 
    + $ bin/hadoop jar -libjars testlib.jar 
    + -archives test.tgz -files file.txt inputjar args
    + job submission with libjars, files and archives
    + 

    + + @see Tool + @see ToolRunner]]> +
    +
    + + + + + + + + + Class<T>) of the + argument of type T. + @param The type of the argument + @param t the object to get it class + @return Class<T>]]> + + + + + + + List<T> to a an array of + T[]. + @param c the Class object of the items in the list + @param list the list to convert]]> + + + + + + List<T> to a an array of + T[]. + @param list the list to convert + @throws ArrayIndexOutOfBoundsException if the list is empty. + Use {@link #toArray(Class, List)} if the list may be empty.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if native-hadoop is loaded, + else false]]> + + + + + + true if native hadoop libraries, if present, can be + used for this job; false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { pq.top().change(); pq.adjustTop(); } + instead of
    +  { o = pq.pop(); o.change(); pq.push(o); }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Clients and/or applications can use the provided Progressable + to explicitly report progress to the Hadoop framework. This is especially + important for operations which take an insignificant amount of time since, + in-lieu of the reported progress, the framework has to assume that an error + has occured and time-out the operation.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Class is to be obtained + @return the correctly typed Class of the given object.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Hadoop Pipes + or Hadoop Streaming. + + It also checks to ensure that we are running on a *nix platform else + (e.g. in Cygwin/Windows) it returns null. + @param job job configuration + @return a String[] with the ulimit command arguments or + null if we are running on a non *nix platform or + if the limit is unspecified.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Shell interface. + @param cmd shell command to execute. + @return the output of the executed command.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + Shell can be used to run unix commands like du or + df. It also offers facilities to gate commands by + time-intervals.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ShellCommandExecutorshould be used in cases where the output + of the command needs no explicit parsing and where the command, working + directory and the environment remains unchanged. The output of the command + is stored as-is and is expected to be small.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ArrayList of string values]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + charToEscape in the string + with the escape char escapeChar + + @param str string + @param escapeChar escape char + @param charToEscape the char to be escaped + @return an escaped string]]> + + + + + + + + + + + + + + charToEscape in the string + with the escape char escapeChar + + @param str string + @param escapeChar escape char + @param charToEscape the escaped char + @return an unescaped string]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tool, is the standard for any Map-Reduce tool/application. + The tool/application should delegate the handling of + + standard command-line options to {@link ToolRunner#run(Tool, String[])} + and only handle its custom arguments.

    + +

    Here is how a typical Tool is implemented:

    +

    +     public class MyApp extends Configured implements Tool {
    +     
    +       public int run(String[] args) throws Exception {
    +         // Configuration processed by ToolRunner
    +         Configuration conf = getConf();
    +         
    +         // Create a JobConf using the processed conf
    +         JobConf job = new JobConf(conf, MyApp.class);
    +         
    +         // Process custom command-line options
    +         Path in = new Path(args[1]);
    +         Path out = new Path(args[2]);
    +         
    +         // Specify various job-specific parameters     
    +         job.setJobName("my-app");
    +         job.setInputPath(in);
    +         job.setOutputPath(out);
    +         job.setMapperClass(MyApp.MyMapper.class);
    +         job.setReducerClass(MyApp.MyReducer.class);
    +
    +         // Submit the job, then poll for progress until the job is complete
    +         JobClient.runJob(job);
    +       }
    +       
    +       public static void main(String[] args) throws Exception {
    +         // Let ToolRunner handle generic command-line options 
    +         int res = ToolRunner.run(new Configuration(), new Sort(), args);
    +         
    +         System.exit(res);
    +       }
    +     }
    + 

    + + @see GenericOptionsParser + @see ToolRunner]]> +
    +
    + + + + + + + + + + + + Tool by {@link Tool#run(String[])}, after + parsing with the given generic arguments. Uses the given + Configuration, or builds one if null. + + Sets the Tool's configuration with the possibly modified + version of the conf. + + @param conf Configuration for the Tool. + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + Tool with its Configuration. + + Equivalent to run(tool.getConf(), tool, args). + + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + + + ToolRunner can be used to run classes implementing + Tool interface. It works in conjunction with + {@link GenericOptionsParser} to parse the + + generic hadoop command line arguments and modifies the + Configuration of the Tool. The + application-specific options are passed along without being modified. +

    + + @see Tool + @see GenericOptionsParser]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + diff --git a/aarch64/share/hadoop/common/jdiff/hadoop_0.18.2.xml b/aarch64/share/hadoop/common/jdiff/hadoop_0.18.2.xml new file mode 100644 index 0000000..08173ab --- /dev/null +++ b/aarch64/share/hadoop/common/jdiff/hadoop_0.18.2.xml @@ -0,0 +1,38788 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + final. + + @param name resource to be added, the classpath is examined for a file + with that name.]]> + + + + + + final. + + @param url url of the resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + final. + + @param file file-path of resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + name property, null if + no such property exists. + + Values are processed for variable expansion + before being returned. + + @param name the property name. + @return the value of the name property, + or null if no such property exists.]]> + + + + + + name property, without doing + variable expansion. + + @param name the property name. + @return the value of the name property, + or null if no such property exists.]]> + + + + + + + value of the name property. + + @param name property name. + @param value property value.]]> + + + + + + + name property. If no such property + exists, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value, or defaultValue if the property + doesn't exist.]]> + + + + + + + name property as an int. + + If no such property exists, or if the specified value is not a valid + int, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as an int, + or defaultValue.]]> + + + + + + + name property to an int. + + @param name property name. + @param value int value of the property.]]> + + + + + + + name property as a long. + If no such property is specified, or if the specified value is not a valid + long, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a long, + or defaultValue.]]> + + + + + + + name property to a long. + + @param name property name. + @param value long value of the property.]]> + + + + + + + name property as a float. + If no such property is specified, or if the specified value is not a valid + float, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a float, + or defaultValue.]]> + + + + + + + name property as a boolean. + If no such property is specified, or if the specified value is not a valid + boolean, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a boolean, + or defaultValue.]]> + + + + + + + name property to a boolean. + + @param name property name. + @param value boolean value of the property.]]> + + + + + + + + + + + + + name property as + a collection of Strings. + If no such property is specified then empty collection is returned. +

    + This is an optimized version of {@link #getStrings(String)} + + @param name property name. + @return property value as a collection of Strings.]]> + + + + + + name property as + an array of Strings. + If no such property is specified then null is returned. + + @param name property name. + @return property value as an array of Strings, + or null.]]> + + + + + + + name property as + an array of Strings. + If no such property is specified then default value is returned. + + @param name property name. + @param defaultValue The default value + @return property value as an array of Strings, + or default value.]]> + + + + + + + name property as + as comma delimited values. + + @param name property name. + @param values The values]]> + + + + + + + + + + + + + + name property as a Class. + If no such property is specified, then defaultValue is + returned. + + @param name the class name. + @param defaultValue default value. + @return property value as a Class, + or defaultValue.]]> + + + + + + + + name property as a Class + implementing the interface specified by xface. + + If no such property is specified, then defaultValue is + returned. + + An exception is thrown if the returned class does not implement the named + interface. + + @param name the class name. + @param defaultValue default value. + @param xface the interface implemented by the named class. + @return property value as a Class, + or defaultValue.]]> + + + + + + + + name property to the name of a + theClass implementing the given interface xface. + + An exception is thrown if theClass does not implement the + interface xface. + + @param name property name. + @param theClass property value. + @param xface the interface implemented by the named class.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + + + + + name. + + @param name configuration resource name. + @return an input stream attached to the resource.]]> + + + + + + name. + + @param name configuration resource name. + @return a reader attached to the resource.]]> + + + + + String + key-value pairs in the configuration. + + @return an iterator over the entries.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + true to set quiet-mode on, false + to turn it off.]]> + + + + + + + + + + + Resources + +

    Configurations are specified by resources. A resource contains a set of + name/value pairs as XML data. Each resource is named by either a + String or by a {@link Path}. If named by a String, + then the classpath is examined for a file with that name. If named by a + Path, then the local filesystem is examined directly, without + referring to the classpath. + +

    Hadoop by default specifies two resources, loaded in-order from the + classpath:

      +
    1. hadoop-default.xml + : Read-only defaults for hadoop.
    2. +
    3. hadoop-site.xml: Site-specific configuration for a given hadoop + installation.
    4. +
    + Applications may add additional resources, which are loaded + subsequent to these resources in the order they are added. + +

    Final Parameters

    + +

    Configuration parameters may be declared final. + Once a resource declares a value final, no subsequently-loaded + resource can alter that value. + For example, one might define a final parameter with: +

    +  <property>
    +    <name>dfs.client.buffer.dir</name>
    +    <value>/tmp/hadoop/dfs/client</value>
    +    <final>true</final>
    +  </property>
    + + Administrators typically define parameters as final in + hadoop-site.xml for values that user applications may not alter. + +

    Variable Expansion

    + +

    Value strings are first processed for variable expansion. The + available properties are:

      +
    1. Other properties defined in this Configuration; and, if a name is + undefined here,
    2. +
    3. Properties in {@link System#getProperties()}.
    4. +
    + +

    For example, if a configuration resource contains the following property + definitions: +

    +  <property>
    +    <name>basedir</name>
    +    <value>/user/${user.name}</value>
    +  </property>
    +  
    +  <property>
    +    <name>tempdir</name>
    +    <value>${basedir}/tmp</value>
    +  </property>
    + + When conf.get("tempdir") is called, then ${basedir} + will be resolved to another property in this Configuration, while + ${user.name} would then ordinarily be resolved to the value + of the System property with that name.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DistributedCache is a facility provided by the Map-Reduce + framework to cache files (text, archives, jars etc.) needed by applications. +

    + +

    Applications specify the files, via urls (hdfs:// or http://) to be cached + via the {@link JobConf}. The DistributedCache assumes that the + files specified via hdfs:// urls are already present on the + {@link FileSystem} at the path specified by the url.

    + +

    The framework will copy the necessary files on to the slave node before + any tasks for the job are executed on that node. Its efficiency stems from + the fact that the files are only copied once per job and the ability to + cache archives which are un-archived on the slaves.

    + +

    DistributedCache can be used to distribute simple, read-only + data/text files and/or more complex types such as archives, jars etc. + Archives (zip, tar and tgz/tar.gz files) are un-archived at the slave nodes. + Jars may be optionally added to the classpath of the tasks, a rudimentary + software distribution mechanism. Files have execution permissions. + Optionally users can also direct it to symlink the distributed cache file(s) + into the working directory of the task.

    + +

    DistributedCache tracks modification timestamps of the cache + files. Clearly the cache files should not be modified by the application + or externally while the job is executing.

    + +

    Here is an illustrative example on how to use the + DistributedCache:

    +

    +     // Setting up the cache for the application
    +     
    +     1. Copy the requisite files to the FileSystem:
    +     
    +     $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat  
    +     $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip  
    +     $ bin/hadoop fs -copyFromLocal mylib.jar /myapp/mylib.jar
    +     $ bin/hadoop fs -copyFromLocal mytar.tar /myapp/mytar.tar
    +     $ bin/hadoop fs -copyFromLocal mytgz.tgz /myapp/mytgz.tgz
    +     $ bin/hadoop fs -copyFromLocal mytargz.tar.gz /myapp/mytargz.tar.gz
    +     
    +     2. Setup the application's JobConf:
    +     
    +     JobConf job = new JobConf();
    +     DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"), 
    +                                   job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/map.zip", job);
    +     DistributedCache.addFileToClassPath(new Path("/myapp/mylib.jar"), job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytar.tar", job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytgz.tgz", job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytargz.tar.gz", job);
    +     
    +     3. Use the cached files in the {@link Mapper} or {@link Reducer}:
    +     
    +     public static class MapClass extends MapReduceBase  
    +     implements Mapper<K, V, K, V> {
    +     
    +       private Path[] localArchives;
    +       private Path[] localFiles;
    +       
    +       public void configure(JobConf job) {
    +         // Get the cached archives/files
    +         localArchives = DistributedCache.getLocalCacheArchives(job);
    +         localFiles = DistributedCache.getLocalCacheFiles(job);
    +       }
    +       
    +       public void map(K key, V value, 
    +                       OutputCollector<K, V> output, Reporter reporter) 
    +       throws IOException {
    +         // Use data from the cached archives/files here
    +         // ...
    +         // ...
    +         output.collect(k, v);
    +       }
    +     }
    +     
    + 

    + + @see JobConf + @see JobClient]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BufferedFSInputStream + with the specified buffer size, + and saves its argument, the input stream + in, for later use. An internal + buffer array of length size + is created and stored in buf. + + @param in the underlying input stream. + @param size the buffer size. + @exception IllegalArgumentException if size <= 0.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + setReplication of FileSystem + @param src file name + @param replication new replication + @throws IOException + @return true if successful; + false if file does not exist or is a directory]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ']]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + fs.scheme.class whose value names the FileSystem class. + The entire URI is passed to the FileSystem instance's initialize method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Return all the files that match filePattern and are not checksum + files. Results are sorted by their names. + +

    + A filename pattern is composed of regular characters and + special pattern matching characters, which are: + +

    +
    +
    +

    +

    ? +
    Matches any single character. + +

    +

    * +
    Matches zero or more characters. + +

    +

    [abc] +
    Matches a single character from character set + {a,b,c}. + +

    +

    [a-b] +
    Matches a single character from the character range + {a...b}. Note that character a must be + lexicographically less than or equal to character b. + +

    +

    [^a] +
    Matches a single character that is not from character set or range + {a}. Note that the ^ character must occur + immediately to the right of the opening bracket. + +

    +

    \c +
    Removes (escapes) any special meaning of character c. + +

    +

    {ab,cd} +
    Matches a string from the string set {ab, cd} + +

    +

    {ab,c{de,fh}} +
    Matches a string from the string set {ab, cde, cfh} + +
    +
    +
    + + @param pathPattern a regular expression specifying a pth pattern + + @return an array of paths that match the path pattern + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + All user code that may potentially use the Hadoop Distributed + File System should be written to use a FileSystem object. The + Hadoop DFS is a multi-machine system that appears as a single + disk. It's useful because of its fault tolerance and potentially + very large capacity. + +

    + The local implementation is {@link LocalFileSystem} and distributed + implementation is {@link DistributedFileSystem}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FilterFileSystem contains + some other file system, which it uses as + its basic file system, possibly transforming + the data along the way or providing additional + functionality. The class FilterFileSystem + itself simply overrides all methods of + FileSystem with versions that + pass all requests to the contained file + system. Subclasses of FilterFileSystem + may further override some of these methods + and may also provide additional methods + and fields.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + buf at offset + and checksum into checksum. + The method is used for implementing read, therefore, it should be optimized + for sequential reading + @param pos chunkPos + @param buf desitination buffer + @param offset offset in buf at which to store data + @param len maximun number of bytes to read + @return number of bytes read]]> + + + + + + + + + + + + + + + + + -1 if the end of the + stream is reached. + @exception IOException if an I/O error occurs.]]> + + + + + + + + + This method implements the general contract of the corresponding + {@link InputStream#read(byte[], int, int) read} method of + the {@link InputStream} class. As an additional + convenience, it attempts to read as many bytes as possible by repeatedly + invoking the read method of the underlying stream. This + iterated read continues until one of the following + conditions becomes true:

      + +
    • The specified number of bytes have been read, + +
    • The read method of the underlying stream returns + -1, indicating end-of-file. + +
    If the first read on the underlying stream returns + -1 to indicate end-of-file then this method returns + -1. Otherwise this method returns the number of bytes + actually read. + + @param b destination buffer. + @param off offset at which to start storing bytes. + @param len maximum number of bytes to read. + @return the number of bytes read, or -1 if the end of + the stream has been reached. + @exception IOException if an I/O error occurs. + ChecksumException if any checksum error occurs]]> +
    + + + + + + + + + + + + n bytes of data from the + input stream. + +

    This method may skip more bytes than are remaining in the backing + file. This produces no exception and the number of bytes skipped + may include some number of bytes that were beyond the EOF of the + backing file. Attempting to read from the stream after skipping past + the end will result in -1 indicating the end of the file. + +

    If n is negative, no bytes are skipped. + + @param n the number of bytes to be skipped. + @return the actual number of bytes skipped. + @exception IOException if an I/O error occurs. + ChecksumException if the chunk to skip to is corrupted]]> + + + + + + + This method may seek past the end of the file. + This produces no exception and an attempt to read from + the stream will result in -1 indicating the end of the file. + + @param pos the postion to seek to. + @exception IOException if an I/O error occurs. + ChecksumException if the chunk to seek to is corrupted]]> + + + + + + + + + + len bytes from + stm + + @param stm an input stream + @param buf destiniation buffer + @param offset offset at which to store data + @param len number of bytes to read + @return actual number of bytes read + @throws IOException if there is any IO error]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + len bytes from the specified byte array + starting at offset off and generate a checksum for + each data chunk. + +

    This method stores bytes from the given array into this + stream's buffer before it gets checksumed. The buffer gets checksumed + and flushed to the underlying output stream when all data + in a checksum chunk are in the buffer. If the buffer is empty and + requested length is at least as large as the size of next checksum chunk + size, this method will checksum and write the chunk directly + to the underlying output stream. Thus it avoids uneccessary data copy. + + @param b the data. + @param off the start offset in the data. + @param len the number of bytes to write. + @exception IOException if an I/O error occurs.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if and only if pathname + should be included]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + trash feature. Files are moved to a user's trash + directory, a subdirectory of their home directory named ".Trash". Files are + initially moved to a current sub-directory of the trash directory. + Within that sub-directory their original path is preserved. Periodically + one may checkpoint the current trash and remove older checkpoints. (This + design permits trash management without enumeration of the full trash + content, without date support in the filesystem, and without clock + synchronization.)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} backed by an FTP client provided by Apache Commons Net. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is a tool for migrating data from an older to a newer version + of an S3 filesystem. +

    +

    + All files in the filesystem are migrated by re-writing the block metadata + - no datafiles are touched. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + Extracts AWS credentials from the filesystem URI or configuration. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A block-based {@link FileSystem} backed by + Amazon S3. +

    + @see NativeS3FileSystem]]> +
    +
    + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If f is a file, this method will make a single call to S3. + If f is a directory, this method will make a maximum of + (n / 1000) + 2 calls to S3, where n is the total number of + files and directories contained directly in f. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} for reading and writing files stored on + Amazon S3. + Unlike {@link org.apache.hadoop.fs.s3.S3FileSystem} this implementation + stores files on S3 in their + native form so they can be read by other S3 tools. +

    + @see org.apache.hadoop.fs.s3.S3FileSystem]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + nth value.]]> + + + + + + + + + + + + + + + + + + + + + nth value in the file.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + public class IntArrayWritable extends ArrayWritable { + public IntArrayWritable() { + super(IntWritable.class); + } + } + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a ByteWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new DataInputStream and + ByteArrayInputStream each time data is read. + +

    Typical usage is something like the following:

    +
    + DataInputBuffer buffer = new DataInputBuffer();
    + while (... loop condition ...) {
    +   byte[] data = ... get data ...;
    +   int dataLength = ... get data length ...;
    +   buffer.reset(data, dataLength);
    +   ... read buffer using DataInput methods ...
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new DataOutputStream and + ByteArrayOutputStream each time data is written. + +

    Typical usage is something like the following:

    +
    + DataOutputBuffer buffer = new DataOutputBuffer();
    + while (... loop condition ...) {
    +   buffer.reset();
    +   ... write buffer using DataOutput methods ...
    +   byte[] data = buffer.getData();
    +   int dataLength = buffer.getLength();
    +   ... write data to its ultimate destination ...
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + the class of the item + @param conf the configuration to store + @param item the object to be stored + @param keyName the name of the key to use + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param items the objects to be stored + @param keyName the name of the key to use + @throws IndexOutOfBoundsException if the items array is empty + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + DefaultStringifier offers convenience methods to store/load objects to/from + the configuration. + + @param the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a DoubleWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a FloatWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When two sequence files, which have same Key type but different Value + types, are mapped out to reduce, multiple Value types is not allowed. + In this case, this class can help you wrap instances with different types. +

    + +

    + Compared with ObjectWritable, this class is much more effective, + because ObjectWritable will append the class declaration as a String + into the output file in every Key-Value pair. +

    + +

    + Generic Writable implements {@link Configurable} interface, so that it will be + configured by the framework. The configuration is passed to the wrapped objects + implementing {@link Configurable} interface before deserialization. +

    + + how to use it:
    + 1. Write your own class, such as GenericObject, which extends GenericWritable.
    + 2. Implements the abstract method getTypes(), defines + the classes which will be wrapped in GenericObject in application. + Attention: this classes defined in getTypes() method, must + implement Writable interface. +

    + + The code looks like this: +
    + public class GenericObject extends GenericWritable {
    + 
    +   private static Class[] CLASSES = {
    +               ClassType1.class, 
    +               ClassType2.class,
    +               ClassType3.class,
    +               };
    +
    +   protected Class[] getTypes() {
    +       return CLASSES;
    +   }
    +
    + }
    + 
    + + @since Nov 8, 2006]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new InputStream and + ByteArrayInputStream each time data is read. + +

    Typical usage is something like the following:

    +
    + InputBuffer buffer = new InputBuffer();
    + while (... loop condition ...) {
    +   byte[] data = ... get data ...;
    +   int dataLength = ... get data length ...;
    +   buffer.reset(data, dataLength);
    +   ... read buffer using InputStream methods ...
    + }
    + 
    + @see DataInputBuffer + @see DataOutput]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a IntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + closes the input and output streams + at the end. + @param in InputStrem to read from + @param out OutputStream to write to + @param conf the Configuration object]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ignore any {@link IOException} or + null pointers. Must only be used for cleanup in exception handlers. + @param log the log to record problems to at debug level. Can be null. + @param closeables the objects to close]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a LongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A map is a directory containing two files, the data file, + containing all keys and values in the map, and a smaller index + file, containing a fraction of the keys. The fraction is determined by + {@link Writer#getIndexInterval()}. + +

    The index file is read entirely into memory. Thus key implementations + should try to keep themselves small. + +

    Map files are created by adding entries in-order. To maintain a large + database, perform updates by copying the previous version of a database and + merging in a sorted change list, to create a new version of the database in + a new file. Sorting large change lists can be done with {@link + SequenceFile.Sorter}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key and + val. Returns true if such a pair exists and false when at + the end of the map]]> + + + + + + + + + + + + + + + + key or if it does not exist, at the first entry + after the named key. + +- * @param key - key that we're trying to find +- * @param val - data value if key is found +- * @return - the key that was the closest match or null if eof.]]> + + + + + + + + + key does not exist, return + the first entry that falls just before the key. Otherwise, + return the record that sorts just after. + @return - the key that was the closest match or null if eof.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is an MD5Hash whose digest contains the + same values.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new OutputStream and + ByteArrayOutputStream each time data is written. + +

    Typical usage is something like the following:

    +
    + OutputBuffer buffer = new OutputBuffer();
    + while (... loop condition ...) {
    +   buffer.reset();
    +   ... write buffer using OutputStream methods ...
    +   byte[] data = buffer.getData();
    +   int dataLength = buffer.getLength();
    +   ... write data to its ultimate destination ...
    + }
    + 
    + @see DataOutputBuffer + @see InputBuffer]]> +
    +
    + + + + + + + + + + + + + + + A {@link Comparator} that operates directly on byte representations of + objects. +

    + @param + @see DeserializerComparator]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SequenceFiles are flat files consisting of binary key/value + pairs. + +

    SequenceFile provides {@link Writer}, {@link Reader} and + {@link Sorter} classes for writing, reading and sorting respectively.

    + + There are three SequenceFile Writers based on the + {@link CompressionType} used to compress key/value pairs: +
      +
    1. + Writer : Uncompressed records. +
    2. +
    3. + RecordCompressWriter : Record-compressed files, only compress + values. +
    4. +
    5. + BlockCompressWriter : Block-compressed files, both keys & + values are collected in 'blocks' + separately and compressed. The size of + the 'block' is configurable. +
    + +

    The actual compression algorithm used to compress key and/or values can be + specified by using the appropriate {@link CompressionCodec}.

    + +

    The recommended way is to use the static createWriter methods + provided by the SequenceFile to chose the preferred format.

    + +

    The {@link Reader} acts as the bridge and can read any of the above + SequenceFile formats.

    + +

    SequenceFile Formats

    + +

    Essentially there are 3 different formats for SequenceFiles + depending on the CompressionType specified. All of them share a + common header described below. + +

    +
      +
    • + version - 3 bytes of magic header SEQ, followed by 1 byte of actual + version number (e.g. SEQ4 or SEQ6) +
    • +
    • + keyClassName -key class +
    • +
    • + valueClassName - value class +
    • +
    • + compression - A boolean which specifies if compression is turned on for + keys/values in this file. +
    • +
    • + blockCompression - A boolean which specifies if block-compression is + turned on for keys/values in this file. +
    • +
    • + compression codec - CompressionCodec class which is used for + compression of keys and/or values (if compression is + enabled). +
    • +
    • + metadata - {@link Metadata} for this file. +
    • +
    • + sync - A sync marker to denote end of the header. +
    • +
    + +
    Uncompressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Value
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +
    Record-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Compressed Value
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +
    Block-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record Block +
        +
      • Compressed key-lengths block-size
      • +
      • Compressed key-lengths block
      • +
      • Compressed keys block-size
      • +
      • Compressed keys block
      • +
      • Compressed value-lengths block-size
      • +
      • Compressed value-lengths block
      • +
      • Compressed values block-size
      • +
      • Compressed values block
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +

    The compressed blocks of key lengths and value lengths consist of the + actual lengths of individual keys/values encoded in ZeroCompressedInteger + format.

    + + @see CompressionCodec]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key, skipping its + value. True if another entry exists, and false at end of file.]]> + + + + + + + + key and + val. Returns true if such a pair exists and false when at + end of file]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The position passed must be a position returned by {@link + SequenceFile.Writer#getLength()} when writing this file. To seek to an arbitrary + position, use {@link SequenceFile.Reader#sync(long)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SegmentDescriptor + @param segments the list of SegmentDescriptors + @param tmpDir the directory to write temporary files into + @return RawKeyValueIterator + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For best performance, applications should make sure that the {@link + Writable#readFields(DataInput)} implementation of their keys is + very efficient. In particular, it should avoid allocating memory.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This always returns a synchronized position. In other words, + immediately after calling {@link SequenceFile.Reader#seek(long)} with a position + returned by this method, {@link SequenceFile.Reader#next(Writable)} may be called. However + the key may be earlier in the file than key last written when this + method was called (e.g., with block-compression, it may be the first key + in the block that was being written when this method was called).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key. Returns + true if such a key exists and false when at the end of the set.]]> + + + + + + + key. + Returns key, or null if no match exists.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + position. Note that this + method avoids using the converter or doing String instatiation + @return the Unicode scalar value at position or -1 + if the position is invalid or points to a + trailing byte]]> + + + + + + + + + + what in the backing + buffer, starting as position start. The starting + position is measured in bytes and the return value is in + terms of byte position in the buffer. The backing buffer is + not converted to a string for this operation. + @return byte position of the first occurence of the search + string in the UTF-8 buffer or -1 if not found]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a Text with the same contents.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException.]]> + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException. + @return ByteBuffer: bytes stores at ByteBuffer.array() + and length is ByteBuffer.limit()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + In + addition, it provides methods for string traversal without converting the + byte array to a string.

    Also includes utilities for + serializing/deserialing a string, coding/decoding a string, checking if a + byte array contains valid UTF8 code, calculating the length of an encoded + string.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a UTF8 with the same contents.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + Also includes utilities for efficiently reading and writing UTF-8. + + @deprecated replaced by Text]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is useful when a class may evolve, so that instances written by the + old version of the class may still be processed by the new version. To + handle this situation, {@link #readFields(DataInput)} + implementations should catch {@link VersionMismatchException}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VIntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VLongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + out. + + @param out DataOuput to serialize this object into. + @throws IOException]]> + + + + + + + in. + +

    For efficiency, implementations should attempt to re-use storage in the + existing object where possible.

    + + @param in DataInput to deseriablize this object from. + @throws IOException]]> +
    + + + Any key or value type in the Hadoop Map-Reduce + framework implements this interface.

    + +

    Implementations typically implement a static read(DataInput) + method which constructs a new instance, calls {@link #readFields(DataInput)} + and returns the instance.

    + +

    Example:

    +

    +     public class MyWritable implements Writable {
    +       // Some data     
    +       private int counter;
    +       private long timestamp;
    +       
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +       
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +       
    +       public static MyWritable read(DataInput in) throws IOException {
    +         MyWritable w = new MyWritable();
    +         w.readFields(in);
    +         return w;
    +       }
    +     }
    + 

    ]]> +
    + + + + + + + + WritableComparables can be compared to each other, typically + via Comparators. Any type which is to be used as a + key in the Hadoop Map-Reduce framework should implement this + interface.

    + +

    Example:

    +

    +     public class MyWritableComparable implements WritableComparable {
    +       // Some data
    +       private int counter;
    +       private long timestamp;
    +       
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +       
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +       
    +       public int compareTo(MyWritableComparable w) {
    +         int thisValue = this.value;
    +         int thatValue = ((IntWritable)o).value;
    +         return (thisValue < thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
    +       }
    +     }
    + 

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The default implementation reads the data into two {@link + WritableComparable}s (using {@link + Writable#readFields(DataInput)}, then calls {@link + #compare(WritableComparable,WritableComparable)}.]]> + + + + + + + The default implementation uses the natural ordering, calling {@link + Comparable#compareTo(Object)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This base implemenation uses the natural ordering. To define alternate + orderings, override {@link #compare(WritableComparable,WritableComparable)}. + +

    One may optimize compare-intensive operations by overriding + {@link #compare(byte[],int,int,byte[],int,int)}. Static utility methods are + provided to assist in optimized implementations of this method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Enum type + @param in DataInput to read from + @param enumType Class type of Enum + @return Enum represented by String read from DataInput + @throws IOException]]> + + + + + + + + + + + + + + + + len number of bytes in input streamin + @param in input stream + @param len number of bytes to skip + @throws IOException when skipped less number of bytes]]> + + + + + + + + + + + + + + CompressionCodec for which to get the + Compressor + @return Compressor for the given + CompressionCodec from the pool or a new one]]> + + + + + + CompressionCodec for which to get the + Decompressor + @return Decompressor for the given + CompressionCodec the pool or a new one]]> + + + + + + Compressor to be returned to the pool]]> + + + + + + Decompressor to be returned to the + pool]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Implementations are assumed to be buffered. This permits clients to + reposition the underlying input stream then call {@link #resetState()}, + without having to also synchronize client buffers.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + true if a preset dictionary is needed for decompression. + @return true if a preset dictionary is needed for decompression]]> + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if native-lzo library is loaded & initialized; + else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + lzo compression/decompression pair. + http://www.oberhumer.com/opensource/lzo/]]> + + + + + + + + + + + + + + + + + + + + + true if lzo compressors are loaded & initialized, + else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if lzo decompressors are loaded & initialized, + else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return the total (non-negative) number of uncompressed bytes input so far]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return the total (non-negative) number of uncompressed bytes input so far]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if native-zlib is loaded & initialized + and can be loaded for this job, else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Keep trying a limited number of times, waiting a fixed time between attempts, + and then fail by re-throwing the exception. +

    ]]> +
    + + + + + + + + Keep trying for a maximum time, waiting a fixed time between attempts, + and then fail by re-throwing the exception. +

    ]]> +
    +
    + + + + + + + Keep trying a limited number of times, waiting a growing amount of time between attempts, + and then fail by re-throwing the exception. + The time between attempts is sleepTime mutliplied by the number of tries so far. +

    ]]> +
    +
    + + + + + + + Keep trying a limited number of times, waiting a growing amount of time between attempts, + and then fail by re-throwing the exception. + The time between attempts is sleepTime mutliplied by a random + number in the range of [0, 2 to the number of retries) +

    ]]> +
    +
    + + + + + + Set a default policy with some explicit handlers for specific exceptions. +

    ]]> +
    +
    + + + + + + A retry policy for RemoteException + Set a default policy with some explicit handlers for specific exceptions. +

    ]]> +
    +
    + + + + Try once, and fail by re-throwing the exception. + This corresponds to having no retry mechanism in place. +

    ]]> +
    +
    + + + + Try once, and fail silently for void methods, or by + re-throwing the exception for non-void methods. +

    ]]> +
    +
    + + + + Keep trying forever. +

    ]]> +
    +
    + + + A collection of useful implementations of {@link RetryPolicy}. +

    ]]> +
    +
    + + + + + + + + + + Determines whether the framework should retry a + method for the given exception, and the number + of retries that have been made for that operation + so far. +

    + @param e The exception that caused the method to fail. + @param retries The number of times the method has been retried. + @return true if the method should be retried, + false if the method should not be retried + but shouldn't fail with an exception (only for void methods). + @throws Exception The re-thrown exception e indicating + that the method failed and should not be retried further.]]> +
    +
    + + + Specifies a policy for retrying method failures. + Implementations of this interface should be immutable. +

    ]]> +
    +
    + + + + + + + + + + + + Create a proxy for an interface of an implementation class + using the same retry policy for each method in the interface. +

    + @param iface the interface that the retry will implement + @param implementation the instance whose methods should be retried + @param retryPolicy the policy for retirying method call failures + @return the retry proxy]]> +
    +
    + + + + + + + Create a proxy for an interface of an implementation class + using the a set of retry policies specified by method name. + If no retry policy is defined for a method then a default of + {@link RetryPolicies#TRY_ONCE_THEN_FAIL} is used. +

    + @param iface the interface that the retry will implement + @param implementation the instance whose methods should be retried + @param methodNameToPolicyMap a map of method names to retry policies + @return the retry proxy]]> +
    +
    + + + A factory for creating retry proxies. +

    ]]> +
    +
    + +
    + + + + + + + + Prepare the deserializer for reading.

    ]]> +
    +
    + + + + + + Deserialize the next object from the underlying input stream. + If the object t is non-null then this deserializer + may set its internal state to the next object read from the input + stream. Otherwise, if the object t is null a new + deserialized object will be created. +

    + @return the deserialized object]]> +
    +
    + + + + Close the underlying input stream and clear up any resources.

    ]]> +
    +
    + + + Provides a facility for deserializing objects of type from an + {@link InputStream}. +

    + +

    + Deserializers are stateful, but must not buffer the input since + other producers may read from the input between calls to + {@link #deserialize(Object)}. +

    + @param ]]> +
    +
    + + + + + + + + + + + + + + + + + + A {@link RawComparator} that uses a {@link Deserializer} to deserialize + the objects to be compared so that the standard {@link Comparator} can + be used to compare them. +

    +

    + One may optimize compare-intensive operations by using a custom + implementation of {@link RawComparator} that operates directly + on byte representations. +

    + @param ]]> +
    +
    + + + + + + + + + + + + + + + + + + An experimental {@link Serialization} for Java {@link Serializable} classes. +

    + @see JavaSerializationComparator]]> +
    +
    + + + + + + + + + + + + + A {@link RawComparator} that uses a {@link JavaSerialization} + {@link Deserializer} to deserialize objects that are then compared via + their {@link Comparable} interfaces. +

    + @param + @see JavaSerialization]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + Encapsulates a {@link Serializer}/{@link Deserializer} pair. +

    + @param ]]> +
    +
    + + + + + + + Serializations are found by reading the io.serializations + property from conf, which is a comma-delimited list of + classnames. +

    ]]> +
    +
    + + + + + + + + + + + + A factory for {@link Serialization}s. +

    ]]> +
    +
    + + + + + + + + Prepare the serializer for writing.

    ]]> +
    +
    + + + + + Serialize t to the underlying output stream.

    ]]> +
    +
    + + + + Close the underlying output stream and clear up any resources.

    ]]> +
    +
    + + + Provides a facility for serializing objects of type to an + {@link OutputStream}. +

    + +

    + Serializers are stateful, but must not buffer the output since + other producers may write to the output between calls to + {@link #serialize(Object)}. +

    + @param ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + param, to the IPC server running at + address, returning the value. Throws exceptions if there are + network problems or if the remote code threw an exception.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Unwraps any IOException. + + @param lookupTypes the desired exception class. + @return IOException, which is either the lookupClass exception or this.]]> + + + + + This unwraps any Throwable that has a constructor taking + a String as a parameter. + Otherwise it returns this. + + @return Throwable]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + protocol is a Java interface. All parameters and return types must + be one of: + +
    • a primitive type, boolean, byte, + char, short, int, long, + float, double, or void; or
    • + +
    • a {@link String}; or
    • + +
    • a {@link Writable}; or
    • + +
    • an array of the above types
    + + All methods in the protocol should throw only IOException. No field data of + the protocol instance is transmitted.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + handlerCount determines + the number of handler threads that will be used to process calls.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + This class has a number of metrics variables that are publicly accessible; + these variables (objects) have methods to update their values; + for example: +

    {@link #rpcQueueTime}.inc(time)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For the statistics that are sampled and averaged, one must specify + a metrics context that does periodic update calls. Most do. + The default Null metrics context however does NOT. So if you aren't + using any other metrics context then you can turn on the viewing and averaging + of sampled metrics by specifying the following two lines + in the hadoop-meterics.properties file: +

    +        rpc.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
    +        rpc.period=10
    +  
    +

    + Note that the metrics are collected regardless of the context used. + The context with the update thread is used to average the data periodically]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobTracker, + as {@link JobTracker.State} + + @return the current state of the JobTracker.]]> + + + + + + + + + + + + ClusterStatus provides clients with information such as: +

      +
    1. + Size of the cluster. +
    2. +
    3. + Task capacity of the cluster. +
    4. +
    5. + The number of currently running map & reduce tasks. +
    6. +
    7. + State of the JobTracker. +
    8. +

    + +

    Clients can query for the latest ClusterStatus, via + {@link JobClient#getClusterStatus()}.

    + + @see JobClient]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Counters represent global counters, defined either by the + Map-Reduce framework or applications. Each Counter can be of + any {@link Enum} type.

    + +

    Counters are bunched into {@link Group}s, each comprising of + counters from a particular Enum class.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Group of counters, comprising of counters from a particular + counter {@link Enum} class. + +

    Grouphandles localization of the class name and the + counter names.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileInputFormat implementations can override this and return + false to ensure that individual input files are never split-up + so that {@link Mapper}s process entire files. + + @param fs the file system that the file is on + @param filename the file name to check + @return is this file splitable?]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileInputFormat is the base class for all file-based + InputFormats. This provides a generic implementation of + {@link #getSplits(JobConf, int)}. + Subclasses of FileInputFormat can also override the + {@link #isSplitable(FileSystem, Path)} method to ensure input-files are + not split-up and are processed as a whole by {@link Mapper}s.]]> + + + + + + + + + + + + + + + + + + + true if the job output should be compressed, + false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tasks' Side-Effect Files + +

    Some applications need to create/write-to side-files, which differ from + the actual job-outputs. + +

    In such cases there could be issues with 2 instances of the same TIP + (running simultaneously e.g. speculative tasks) trying to open/write-to the + same file (path) on HDFS. Hence the application-writer will have to pick + unique names per task-attempt (e.g. using the attemptid, say + attempt_200709221812_0001_m_000000_0), not just per TIP.

    + +

    To get around this the Map-Reduce framework helps the application-writer + out by maintaining a special + ${mapred.output.dir}/_temporary/_${taskid} + sub-directory for each task-attempt on HDFS where the output of the + task-attempt goes. On successful completion of the task-attempt the files + in the ${mapred.output.dir}/_temporary/_${taskid} (only) + are promoted to ${mapred.output.dir}. Of course, the + framework discards the sub-directory of unsuccessful task-attempts. This + is completely transparent to the application.

    + +

    The application-writer can take advantage of this by creating any + side-files required in ${mapred.work.output.dir} during execution + of his reduce-task i.e. via {@link #getWorkOutputPath(JobConf)}, and the + framework will move them out similarly - thus she doesn't have to pick + unique paths per task-attempt.

    + +

    Note: the value of ${mapred.work.output.dir} during + execution of a particular task-attempt is actually + ${mapred.output.dir}/_temporary/_{$taskid}, and this value is + set by the map-reduce framework. So, just create any side-files in the + path returned by {@link #getWorkOutputPath(JobConf)} from map/reduce + task to take advantage of this feature.

    + +

    The entire discussion holds true for maps of jobs with + reducer=NONE (i.e. 0 reduces) since output of the map, in that case, + goes directly to HDFS.

    + + @return the {@link Path} to the task's temporary output directory + for the map-reduce job.]]> +
    +
    + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This method is used to validate the input directories when a job is + submitted so that the {@link JobClient} can fail early, with an useful + error message, in case of errors. For e.g. input directory does not exist. +

    + + @param job job configuration. + @throws InvalidInputException if the job does not have valid input + @deprecated getSplits is called in the client and can perform any + necessary validation of the input]]> +
    +
    + + + + + + Each {@link InputSplit} is then assigned to an individual {@link Mapper} + for processing.

    + +

    Note: The split is a logical split of the inputs and the + input files are not physically split into chunks. For e.g. a split could + be <input-file-path, start, offset> tuple. + + @param job job configuration. + @param numSplits the desired number of splits, a hint. + @return an array of {@link InputSplit}s for the job.]]> + + + + + + + + + It is the responsibility of the RecordReader to respect + record boundaries while processing the logical split to present a + record-oriented view to the individual task.

    + + @param split the {@link InputSplit} + @param job the job that this split belongs to + @return a {@link RecordReader}]]> +
    +
    + + InputFormat describes the input-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the InputFormat of the + job to:

    +

      +
    1. + Validate the input-specification of the job. +
    2. + Split-up the input file(s) into logical {@link InputSplit}s, each of + which is then assigned to an individual {@link Mapper}. +
    3. +
    4. + Provide the {@link RecordReader} implementation to be used to glean + input records from the logical InputSplit for processing by + the {@link Mapper}. +
    5. +
    + +

    The default behavior of file-based {@link InputFormat}s, typically + sub-classes of {@link FileInputFormat}, is to split the + input into logical {@link InputSplit}s based on the total size, in + bytes, of the input files. However, the {@link FileSystem} blocksize of + the input files is treated as an upper bound for input splits. A lower bound + on the split size can be set via + + mapred.min.split.size.

    + +

    Clearly, logical splits based on input-size is insufficient for many + applications since record boundaries are to respected. In such cases, the + application has to also implement a {@link RecordReader} on whom lies the + responsibilty to respect record-boundaries and present a record-oriented + view of the logical InputSplit to the individual task. + + @see InputSplit + @see RecordReader + @see JobClient + @see FileInputFormat]]> + + + + + + + + + + InputSplit. + + @return the number of bytes in the input split. + @throws IOException]]> + + + + + + InputSplit is + located as an array of Strings. + @throws IOException]]> + + + + InputSplit represents the data to be processed by an + individual {@link Mapper}. + +

    Typically, it presents a byte-oriented view on the input and is the + responsibility of {@link RecordReader} of the job to process this and present + a record-oriented view. + + @see InputFormat + @see RecordReader]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobClient.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + jobid doesn't correspond to any known job. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobClient is the primary interface for the user-job to interact + with the {@link JobTracker}. + + JobClient provides facilities to submit jobs, track their + progress, access component-tasks' reports/logs, get the Map-Reduce cluster + status information etc. + +

    The job submission process involves: +

      +
    1. + Checking the input and output specifications of the job. +
    2. +
    3. + Computing the {@link InputSplit}s for the job. +
    4. +
    5. + Setup the requisite accounting information for the {@link DistributedCache} + of the job, if necessary. +
    6. +
    7. + Copying the job's jar and configuration to the map-reduce system directory + on the distributed file-system. +
    8. +
    9. + Submitting the job to the JobTracker and optionally monitoring + it's status. +
    10. +

    + + Normally the user creates the application, describes various facets of the + job via {@link JobConf} and then uses the JobClient to submit + the job and monitor its progress. + +

    Here is an example on how to use JobClient:

    +

    +     // Create a new JobConf
    +     JobConf job = new JobConf(new Configuration(), MyJob.class);
    +     
    +     // Specify various job-specific parameters     
    +     job.setJobName("myjob");
    +     
    +     job.setInputPath(new Path("in"));
    +     job.setOutputPath(new Path("out"));
    +     
    +     job.setMapperClass(MyJob.MyMapper.class);
    +     job.setReducerClass(MyJob.MyReducer.class);
    +
    +     // Submit the job, then poll for progress until the job is complete
    +     JobClient.runJob(job);
    + 

    + +

    Job Control

    + +

    At times clients would chain map-reduce jobs to accomplish complex tasks + which cannot be done via a single map-reduce job. This is fairly easy since + the output of the job, typically, goes to distributed file-system and that + can be used as the input for the next job.

    + +

    However, this also means that the onus on ensuring jobs are complete + (success/failure) lies squarely on the clients. In such situations the + various job-control options are: +

      +
    1. + {@link #runJob(JobConf)} : submits the job and returns only after + the job has completed. +
    2. +
    3. + {@link #submitJob(JobConf)} : only submits the job, then poll the + returned handle to the {@link RunningJob} to query status and make + scheduling decisions. +
    4. +
    5. + {@link JobConf#setJobEndNotificationURI(String)} : setup a notification + on job-completion, thus avoiding polling. +
    6. +

    + + @see JobConf + @see ClusterStatus + @see Tool + @see DistributedCache]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if framework should keep the intermediate files + for failed tasks, false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Note: +

    + @param dir the {@link Path} of the output directory for the map-reduce job.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the outputs of the maps are to be compressed, + false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This comparator should be provided if the equivalence rules for keys + for sorting the intermediates are different from those for grouping keys + before each call to + {@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.

    + +

    For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed + in a single call to the reduce function if K1 and K2 compare as equal.

    + +

    Since {@link #setOutputKeyComparatorClass(Class)} can be used to control + how keys are sorted, this can be used in conjunction to simulate + secondary sort on values.

    + +

    Note: This is not a guarantee of the reduce sort being + stable in any sense. (In any case, with the order of available + map-outputs to the reduce being non-deterministic, it wouldn't make + that much sense.)

    + + @param theClass the comparator class to be used for grouping keys. + It should implement RawComparator. + @see #setOutputKeyComparatorClass(Class)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + combiner class used to combine map-outputs + before being sent to the reducers. Typically the combiner is same as the + the {@link Reducer} for the job i.e. {@link #getReducerClass()}. + + @return the user-defined combiner class used to combine map-outputs.]]> + + + + + + combiner class used to combine map-outputs + before being sent to the reducers. + +

    The combiner is a task-level aggregation operation which, in some cases, + helps to cut down the amount of data transferred from the {@link Mapper} to + the {@link Reducer}, leading to better performance.

    + +

    Typically the combiner is same as the Reducer for the + job i.e. {@link #setReducerClass(Class)}.

    + + @param theClass the user-defined combiner class used to combine + map-outputs.]]> +
    +
    + + + + + + + + + + + true. + + @return true if speculative execution be used for this job, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on, else false.]]> + + + + + true. + + @return true if speculative execution be + used for this job for map tasks, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on for map tasks, + else false.]]> + + + + + true. + + @return true if speculative execution be used + for reduce tasks for this job, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on for reduce tasks, + else false.]]> + + + + + 1. + + @return the number of reduce tasks for this job.]]> + + + + + + Note: This is only a hint to the framework. The actual + number of spawned map tasks depends on the number of {@link InputSplit}s + generated by the job's {@link InputFormat#getSplits(JobConf, int)}. + + A custom {@link InputFormat} is typically used to accurately control + the number of map tasks for the job.

    + +

    How many maps?

    + +

    The number of maps is usually driven by the total size of the inputs + i.e. total number of blocks of the input files.

    + +

    The right level of parallelism for maps seems to be around 10-100 maps + per-node, although it has been set up to 300 or so for very cpu-light map + tasks. Task setup takes awhile, so it is best if the maps take at least a + minute to execute.

    + +

    The default behavior of file-based {@link InputFormat}s is to split the + input into logical {@link InputSplit}s based on the total size, in + bytes, of input files. However, the {@link FileSystem} blocksize of the + input files is treated as an upper bound for input splits. A lower bound + on the split size can be set via + + mapred.min.split.size.

    + +

    Thus, if you expect 10TB of input data and have a blocksize of 128MB, + you'll end up with 82,000 maps, unless {@link #setNumMapTasks(int)} is + used to set it even higher.

    + + @param n the number of map tasks for this job. + @see InputFormat#getSplits(JobConf, int) + @see FileInputFormat + @see FileSystem#getDefaultBlockSize() + @see FileStatus#getBlockSize()]]> +
    +
    + + + 1. + + @return the number of reduce tasks for this job.]]> + + + + + + How many reduces? + +

    The right number of reduces seems to be 0.95 or + 1.75 multiplied by (<no. of nodes> * + + mapred.tasktracker.reduce.tasks.maximum). +

    + +

    With 0.95 all of the reduces can launch immediately and + start transfering map outputs as the maps finish. With 1.75 + the faster nodes will finish their first round of reduces and launch a + second wave of reduces doing a much better job of load balancing.

    + +

    Increasing the number of reduces increases the framework overhead, but + increases load balancing and lowers the cost of failures.

    + +

    The scaling factors above are slightly less than whole numbers to + reserve a few reduce slots in the framework for speculative-tasks, failures + etc.

    + +

    Reducer NONE

    + +

    It is legal to set the number of reduce-tasks to zero.

    + +

    In this case the output of the map-tasks directly go to distributed + file-system, to the path set by + {@link FileOutputFormat#setOutputPath(JobConf, Path)}. Also, the + framework doesn't sort the map-outputs before writing it out to HDFS.

    + + @param n the number of reduce tasks for this job.]]> +
    +
    + + + mapred.map.max.attempts + property. If this property is not already set, the default is 4 attempts. + + @return the max number of attempts per map task.]]> + + + + + + + + + + + mapred.reduce.max.attempts + property. If this property is not already set, the default is 4 attempts. + + @return the max number of attempts per reduce task.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + noFailures, the + tasktracker is blacklisted for this job. + + @param noFailures maximum no. of failures of a given job per tasktracker.]]> + + + + + blacklisted for this job. + + @return the maximum no. of failures of a given job per tasktracker.]]> + + + + + failed. + + Defaults to zero, i.e. any failed map-task results in + the job being declared as {@link JobStatus#FAILED}. + + @return the maximum percentage of map tasks that can fail without + the job being aborted.]]> + + + + + + failed. + + @param percent the maximum percentage of map tasks that can fail without + the job being aborted.]]> + + + + + failed. + + Defaults to zero, i.e. any failed reduce-task results + in the job being declared as {@link JobStatus#FAILED}. + + @return the maximum percentage of reduce tasks that can fail without + the job being aborted.]]> + + + + + + failed. + + @param percent the maximum percentage of reduce tasks that can fail without + the job being aborted.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The debug script can aid debugging of failed map tasks. The script is + given task's stdout, stderr, syslog, jobconf files as arguments.

    + +

    The debug command, run on the node where the map failed, is:

    +

    + $script $stdout $stderr $syslog $jobconf. +

    + +

    The script file is distributed through {@link DistributedCache} + APIs. The script needs to be symlinked.

    + +

    Here is an example on how to submit a script +

    + job.setMapDebugScript("./myscript");
    + DistributedCache.createSymlink(job);
    + DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
    + 

    + + @param mDbgScript the script name]]> +
    +
    + + + + + + + + + The debug script can aid debugging of failed reduce tasks. The script + is given task's stdout, stderr, syslog, jobconf files as arguments.

    + +

    The debug command, run on the node where the map failed, is:

    +

    + $script $stdout $stderr $syslog $jobconf. +

    + +

    The script file is distributed through {@link DistributedCache} + APIs. The script file needs to be symlinked

    + +

    Here is an example on how to submit a script +

    + job.setReduceDebugScript("./myscript");
    + DistributedCache.createSymlink(job);
    + DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
    + 

    + + @param rDbgScript the script name]]> +
    +
    + + + + + + + + null if it hasn't + been set. + @see #setJobEndNotificationURI(String)]]> + + + + + + The uri can contain 2 special parameters: $jobId and + $jobStatus. Those, if present, are replaced by the job's + identifier and completion-status respectively.

    + +

    This is typically used by application-writers to implement chaining of + Map-Reduce jobs in an asynchronous manner.

    + + @param uri the job end notification uri + @see JobStatus + @see Job Completion and Chaining]]> +
    +
    + + + + When a job starts, a shared directory is created at location + + ${mapred.local.dir}/taskTracker/jobcache/$jobid/work/ . + This directory is exposed to the users through + job.local.dir . + So, the tasks can use this space + as scratch space and share files among them.

    + This value is available as System property also. + + @return The localized job specific shared directory]]> +
    +
    + + JobConf is the primary interface for a user to describe a + map-reduce job to the Hadoop framework for execution. The framework tries to + faithfully execute the job as-is described by JobConf, however: +
      +
    1. + Some configuration parameters might have been marked as + + final by administrators and hence cannot be altered. +
    2. +
    3. + While some job parameters are straight-forward to set + (e.g. {@link #setNumReduceTasks(int)}), some parameters interact subtly + rest of the framework and/or job-configuration and is relatively more + complex for the user to control finely (e.g. {@link #setNumMapTasks(int)}). +
    4. +

    + +

    JobConf typically specifies the {@link Mapper}, combiner + (if any), {@link Partitioner}, {@link Reducer}, {@link InputFormat} and + {@link OutputFormat} implementations to be used etc. + +

    Optionally JobConf is used to specify other advanced facets + of the job such as Comparators to be used, files to be put in + the {@link DistributedCache}, whether or not intermediate and/or job outputs + are to be compressed (and how), debugability via user-provided scripts + ( {@link #setMapDebugScript(String)}/{@link #setReduceDebugScript(String)}), + for doing post-processing on task logs, task's stdout, stderr, syslog. + and etc.

    + +

    Here is an example on how to configure a job via JobConf:

    +

    +     // Create a new JobConf
    +     JobConf job = new JobConf(new Configuration(), MyJob.class);
    +     
    +     // Specify various job-specific parameters     
    +     job.setJobName("myjob");
    +     
    +     FileInputFormat.setInputPaths(job, new Path("in"));
    +     FileOutputFormat.setOutputPath(job, new Path("out"));
    +     
    +     job.setMapperClass(MyJob.MyMapper.class);
    +     job.setCombinerClass(MyJob.MyReducer.class);
    +     job.setReducerClass(MyJob.MyReducer.class);
    +     
    +     job.setInputFormat(SequenceFileInputFormat.class);
    +     job.setOutputFormat(SequenceFileOutputFormat.class);
    + 

    + + @see JobClient + @see ClusterStatus + @see Tool + @see DistributedCache]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + .]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + any job + run on the jobtracker started at 200707121733, we would use : +
     
    + JobID.getTaskIDsPattern("200707121733", null);
    + 
    + which will return : +
     "job_200707121733_[0-9]*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @return a regex pattern matching JobIDs]]> +
    +
    + + + An example JobID is : + job_200707121733_0003 , which represents the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse JobID strings, but rather + use appropriate constructors or {@link #forName(String)} method. + + @see TaskID + @see TaskAttemptID + @see JobTracker#getNewJobId() + @see JobTracker#getStartTime()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -archives + -files inputjar args]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + zero. + + @param conf configuration for the JobTracker. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + io.file.buffer.size specified in the given + Configuration. + @param in input stream + @param conf configuration + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Output pairs need not be of the same types as input pairs. A given + input pair may map to zero or many output pairs. Output pairs are + collected with calls to + {@link OutputCollector#collect(Object,Object)}.

    + +

    Applications can use the {@link Reporter} provided to report progress + or just indicate that they are alive. In scenarios where the application + takes an insignificant amount of time to process individual key/value + pairs, this is crucial since the framework might assume that the task has + timed-out and kill that task. The other way of avoiding this is to set + + mapred.task.timeout to a high-enough value (or even zero for no + time-outs).

    + + @param key the input key. + @param value the input value. + @param output collects mapped keys and values. + @param reporter facility to report progress.]]> +
    + + + Maps are the individual tasks which transform input records into a + intermediate records. The transformed intermediate records need not be of + the same type as the input records. A given input pair may map to zero or + many output pairs.

    + +

    The Hadoop Map-Reduce framework spawns one map task for each + {@link InputSplit} generated by the {@link InputFormat} for the job. + Mapper implementations can access the {@link JobConf} for the + job via the {@link JobConfigurable#configure(JobConf)} and initialize + themselves. Similarly they can use the {@link Closeable#close()} method for + de-initialization.

    + +

    The framework then calls + {@link #map(Object, Object, OutputCollector, Reporter)} + for each key/value pair in the InputSplit for that task.

    + +

    All intermediate values associated with a given output key are + subsequently grouped by the framework, and passed to a {@link Reducer} to + determine the final output. Users can control the grouping by specifying + a Comparator via + {@link JobConf#setOutputKeyComparatorClass(Class)}.

    + +

    The grouped Mapper outputs are partitioned per + Reducer. Users can control which keys (and hence records) go to + which Reducer by implementing a custom {@link Partitioner}. + +

    Users can optionally specify a combiner, via + {@link JobConf#setCombinerClass(Class)}, to perform local aggregation of the + intermediate outputs, which helps to cut down the amount of data transferred + from the Mapper to the Reducer. + +

    The intermediate, grouped outputs are always stored in + {@link SequenceFile}s. Applications can specify if and how the intermediate + outputs are to be compressed and which {@link CompressionCodec}s are to be + used via the JobConf.

    + +

    If the job has + zero + reduces then the output of the Mapper is directly written + to the {@link FileSystem} without grouping by keys.

    + +

    Example:

    +

    +     public class MyMapper<K extends WritableComparable, V extends Writable> 
    +     extends MapReduceBase implements Mapper<K, V, K, V> {
    +     
    +       static enum MyCounters { NUM_RECORDS }
    +       
    +       private String mapTaskId;
    +       private String inputFile;
    +       private int noRecords = 0;
    +       
    +       public void configure(JobConf job) {
    +         mapTaskId = job.get("mapred.task.id");
    +         inputFile = job.get("mapred.input.file");
    +       }
    +       
    +       public void map(K key, V val,
    +                       OutputCollector<K, V> output, Reporter reporter)
    +       throws IOException {
    +         // Process the <key, value> pair (assume this takes a while)
    +         // ...
    +         // ...
    +         
    +         // Let the framework know that we are alive, and kicking!
    +         // reporter.progress();
    +         
    +         // Process some more
    +         // ...
    +         // ...
    +         
    +         // Increment the no. of <key, value> pairs processed
    +         ++noRecords;
    +
    +         // Increment counters
    +         reporter.incrCounter(NUM_RECORDS, 1);
    +        
    +         // Every 100 records update application-level status
    +         if ((noRecords%100) == 0) {
    +           reporter.setStatus(mapTaskId + " processed " + noRecords + 
    +                              " from input-file: " + inputFile); 
    +         }
    +         
    +         // Output the result
    +         output.collect(key, val);
    +       }
    +     }
    + 

    + +

    Applications may write a custom {@link MapRunnable} to exert greater + control on map processing e.g. multi-threaded Mappers etc.

    + + @see JobConf + @see InputFormat + @see Partitioner + @see Reducer + @see MapReduceBase + @see MapRunnable + @see SequenceFile]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + Provides default no-op implementations for a few methods, most non-trivial + applications need to override some of them.

    ]]> +
    +
    + + + + + + + + + + + <key, value> pairs. + +

    Mapping of input records to output records is complete when this method + returns.

    + + @param input the {@link RecordReader} to read the input records. + @param output the {@link OutputCollector} to collect the outputrecords. + @param reporter {@link Reporter} to report progress, status-updates etc. + @throws IOException]]> +
    +
    + + Custom implementations of MapRunnable can exert greater + control on map processing e.g. multi-threaded, asynchronous mappers etc.

    + + @see Mapper]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + nearly + equal content length.
    + Subclasses implement {@link #getRecordReader(InputSplit, JobConf, Reporter)} + to construct RecordReader's for MultiFileSplit's. + @see MultiFileSplit]]> +
    +
    + + + + + + + + + + + + + + + + + th Path]]> + + + + + + + + + + + th Path]]> + + + + + + + + + + + + + + + + + + + + + + + MultiFileSplit can be used to implement {@link RecordReader}'s, with + reading one record per file. + @see FileSplit + @see MultiFileInputFormat]]> + + + + + + + + + + + + + + + <key, value> pairs output by {@link Mapper}s + and {@link Reducer}s. + +

    OutputCollector is the generalization of the facility + provided by the Map-Reduce framework to collect data output by either the + Mapper or the Reducer i.e. intermediate outputs + or the output of the job.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + This is to validate the output specification for the job when it is + a job is submitted. Typically checks that it does not already exist, + throwing an exception when it already exists, so that output is not + overwritten.

    + + @param ignored + @param job job configuration. + @throws IOException when output should not be attempted]]> +
    +
    + + OutputFormat describes the output-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the OutputFormat of the + job to:

    +

      +
    1. + Validate the output-specification of the job. For e.g. check that the + output directory doesn't already exist. +
    2. + Provide the {@link RecordWriter} implementation to be used to write out + the output files of the job. Output files are stored in a + {@link FileSystem}. +
    3. +
    + + @see RecordWriter + @see JobConf]]> +
    +
    + + + + + + + + + + + + + + + + + true if the job output should be compressed, + false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Typically a hash function on a all or a subset of the key.

    + + @param key the key to be paritioned. + @param value the entry value. + @param numPartitions the total number of partitions. + @return the partition number for the key.]]> +
    +
    + + Partitioner controls the partitioning of the keys of the + intermediate map-outputs. The key (or a subset of the key) is used to derive + the partition, typically by a hash function. The total number of partitions + is the same as the number of reduce tasks for the job. Hence this controls + which of the m reduce tasks the intermediate key (and hence the + record) is sent for reduction.

    + + @see Reducer]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.0 to 1.0. + @throws IOException]]> + + + + RecordReader reads <key, value> pairs from an + {@link InputSplit}. + +

    RecordReader, typically, converts the byte-oriented view of + the input, provided by the InputSplit, and presents a + record-oriented view for the {@link Mapper} & {@link Reducer} tasks for + processing. It thus assumes the responsibility of processing record + boundaries and presenting the tasks with keys and values.

    + + @see InputSplit + @see InputFormat]]> +
    +
    + + + + + + + + + + + + + + + + RecordWriter to future operations. + + @param reporter facility to report progress. + @throws IOException]]> + + + + RecordWriter writes the output <key, value> pairs + to an output file. + +

    RecordWriter implementations write the job outputs to the + {@link FileSystem}. + + @see OutputFormat]]> + + + + + + + + + + + + + + + Reduces values for a given key. + +

    The framework calls this method for each + <key, (list of values)> pair in the grouped inputs. + Output values must be of the same type as input values. Input keys must + not be altered. The framework will reuse the key and value objects + that are passed into the reduce, therefore the application should clone + the objects they want to keep a copy of. In many cases, all values are + combined into zero or one value. +

    + +

    Output pairs are collected with calls to + {@link OutputCollector#collect(Object,Object)}.

    + +

    Applications can use the {@link Reporter} provided to report progress + or just indicate that they are alive. In scenarios where the application + takes an insignificant amount of time to process individual key/value + pairs, this is crucial since the framework might assume that the task has + timed-out and kill that task. The other way of avoiding this is to set + + mapred.task.timeout to a high-enough value (or even zero for no + time-outs).

    + + @param key the key. + @param values the list of values to reduce. + @param output to collect keys and combined values. + @param reporter facility to report progress.]]> +
    + + + The number of Reducers for the job is set by the user via + {@link JobConf#setNumReduceTasks(int)}. Reducer implementations + can access the {@link JobConf} for the job via the + {@link JobConfigurable#configure(JobConf)} method and initialize themselves. + Similarly they can use the {@link Closeable#close()} method for + de-initialization.

    + +

    Reducer has 3 primary phases:

    +
      +
    1. + +

      Shuffle

      + +

      Reducer is input the grouped output of a {@link Mapper}. + In the phase the framework, for each Reducer, fetches the + relevant partition of the output of all the Mappers, via HTTP. +

      +
    2. + +
    3. +

      Sort

      + +

      The framework groups Reducer inputs by keys + (since different Mappers may have output the same key) in this + stage.

      + +

      The shuffle and sort phases occur simultaneously i.e. while outputs are + being fetched they are merged.

      + +
      SecondarySort
      + +

      If equivalence rules for keys while grouping the intermediates are + different from those for grouping keys before reduction, then one may + specify a Comparator via + {@link JobConf#setOutputValueGroupingComparator(Class)}.Since + {@link JobConf#setOutputKeyComparatorClass(Class)} can be used to + control how intermediate keys are grouped, these can be used in conjunction + to simulate secondary sort on values.

      + + + For example, say that you want to find duplicate web pages and tag them + all with the url of the "best" known example. You would set up the job + like: +
        +
      • Map Input Key: url
      • +
      • Map Input Value: document
      • +
      • Map Output Key: document checksum, url pagerank
      • +
      • Map Output Value: url
      • +
      • Partitioner: by checksum
      • +
      • OutputKeyComparator: by checksum and then decreasing pagerank
      • +
      • OutputValueGroupingComparator: by checksum
      • +
      +
    4. + +
    5. +

      Reduce

      + +

      In this phase the + {@link #reduce(Object, Iterator, OutputCollector, Reporter)} + method is called for each <key, (list of values)> pair in + the grouped inputs.

      +

      The output of the reduce task is typically written to the + {@link FileSystem} via + {@link OutputCollector#collect(Object, Object)}.

      +
    6. +
    + +

    The output of the Reducer is not re-sorted.

    + +

    Example:

    +

    +     public class MyReducer<K extends WritableComparable, V extends Writable> 
    +     extends MapReduceBase implements Reducer<K, V, K, V> {
    +     
    +       static enum MyCounters { NUM_RECORDS }
    +        
    +       private String reduceTaskId;
    +       private int noKeys = 0;
    +       
    +       public void configure(JobConf job) {
    +         reduceTaskId = job.get("mapred.task.id");
    +       }
    +       
    +       public void reduce(K key, Iterator<V> values,
    +                          OutputCollector<K, V> output, 
    +                          Reporter reporter)
    +       throws IOException {
    +       
    +         // Process
    +         int noValues = 0;
    +         while (values.hasNext()) {
    +           V value = values.next();
    +           
    +           // Increment the no. of values for this key
    +           ++noValues;
    +           
    +           // Process the <key, value> pair (assume this takes a while)
    +           // ...
    +           // ...
    +           
    +           // Let the framework know that we are alive, and kicking!
    +           if ((noValues%10) == 0) {
    +             reporter.progress();
    +           }
    +         
    +           // Process some more
    +           // ...
    +           // ...
    +           
    +           // Output the <key, value> 
    +           output.collect(key, value);
    +         }
    +         
    +         // Increment the no. of <key, list of values> pairs processed
    +         ++noKeys;
    +         
    +         // Increment counters
    +         reporter.incrCounter(NUM_RECORDS, 1);
    +         
    +         // Every 100 keys update application-level status
    +         if ((noKeys%100) == 0) {
    +           reporter.setStatus(reduceTaskId + " processed " + noKeys);
    +         }
    +       }
    +     }
    + 

    + + @see Mapper + @see Partitioner + @see Reporter + @see MapReduceBase]]> +
    +
    + + + + + + + + + + + + + + + Enum. + @param amount A non-negative amount by which the counter is to + be incremented.]]> + + + + + + + + + + + + + + InputSplit that the map is reading from. + @throws UnsupportedOperationException if called outside a mapper]]> + + + + + + + + + {@link Mapper} and {@link Reducer} can use the Reporter + provided to report progress or just indicate that they are alive. In + scenarios where the application takes an insignificant amount of time to + process individual key/value pairs, this is crucial since the framework + might assume that the task has timed-out and kill that task. + +

    Applications can also update {@link Counters} via the provided + Reporter .

    + + @see Progressable + @see Counters]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + progress of the job's map-tasks, as a float between 0.0 + and 1.0. When all map tasks have completed, the function returns 1.0. + + @return the progress of the job's map-tasks. + @throws IOException]]> + + + + + + progress of the job's reduce-tasks, as a float between 0.0 + and 1.0. When all reduce tasks have completed, the function returns 1.0. + + @return the progress of the job's reduce-tasks. + @throws IOException]]> + + + + + + true if the job is complete, else false. + @throws IOException]]> + + + + + + true if the job succeeded, else false. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RunningJob is the user-interface to query for details on a + running Map-Reduce job. + +

    Clients can get hold of RunningJob via the {@link JobClient} + and then query the running-job for details such as name, configuration, + progress etc.

    + + @see JobClient]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This allows the user to specify the key class to be different + from the actual class ({@link BytesWritable}) used for writing

    + + @param conf the {@link JobConf} to modify + @param theClass the SequenceFile output key class.]]> +
    +
    + + + + + This allows the user to specify the value class to be different + from the actual class ({@link BytesWritable}) used for writing

    + + @param conf the {@link JobConf} to modify + @param theClass the SequenceFile output key class.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + f. The filtering criteria is + MD5(key) % f == 0.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + f using + the criteria record# % f == 0. + For example, if the frequency is 10, one out of 10 records is returned.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + . + @param name The name of the server + @param port The port to use on the server + @param findPort whether the server should start at the given port and + increment by 1 until it finds a free port.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + points to the log directory + "/static/" -> points to common static files (src/webapps/static) + "/" -> the jsp server code from (src/webapps/)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + all task attempt IDs + of any jobtracker, in any job, of the first + map task, we would use : +
     
    + TaskAttemptID.getTaskAttemptIDsPattern(null, null, true, 1, null);
    + 
    + which will return : +
     "attempt_[^_]*_[0-9]*_m_000001_[0-9]*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @param isMap whether the tip is a map, or null + @param taskId taskId number, or null + @param attemptId the task attempt number, or null + @return a regex pattern matching TaskAttemptIDs]]> +
    +
    + + + An example TaskAttemptID is : + attempt_200707121733_0003_m_000005_0 , which represents the + zeroth task attempt for the fifth map task in the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse TaskAttemptID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the first map task + of any jobtracker, of any job, we would use : +

     
    + TaskID.getTaskIDsPattern(null, null, true, 1);
    + 
    + which will return : +
     "task_[^_]*_[0-9]*_m_000001*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @param isMap whether the tip is a map, or null + @param taskId taskId number, or null + @return a regex pattern matching TaskIDs]]> +
    + + + + An example TaskID is : + task_200707121733_0003_m_000005 , which represents the + fifth map task in the third job running at the jobtracker + started at 200707121733. +

    + Applications should never construct or parse TaskID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskAttemptID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hadoop.log.dir.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the Job was added.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ([,]*) + func ::= tbl(,"") + class ::= @see java.lang.Class#forName(java.lang.String) + path ::= @see org.apache.hadoop.fs.Path#Path(java.lang.String) + } + Reads expression from the mapred.join.expr property and + user-supplied join types from mapred.join.define.<ident> + types. Paths supplied to tbl are given as input paths to the + InputFormat class listed. + @see #compose(java.lang.String, java.lang.Class, java.lang.String...)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ,

    ) }]]> + + + + + + + + (tbl(,),tbl(,),...,tbl(,)) }]]> + + + + + + + + (tbl(,),tbl(,),...,tbl(,)) }]]> + + + + mapred.join.define.<ident> to a classname. In the expression + mapred.join.expr, the identifier will be assumed to be a + ComposableRecordReader. + mapred.join.keycomparator can be a classname used to compare keys + in the join. + @see JoinRecordReader + @see MultiFilterRecordReader]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ...... + }]]> + + + + + + + + + + + + + + + + + + + + + capacity children to position + id in the parent reader. + The id of a root CompositeRecordReader is -1 by convention, but relying + on this is not recommended.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + override(S1,S2,S3) will prefer values + from S3 over S2, and values from S2 over S1 for all keys + emitted from all sources.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [,,...,]]]> + + + + + + + out. + TupleWritable format: + {@code + ...... + }]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It can be used instead of the default implementation, + @link org.apache.hadoop.mapred.MapRunner, when the Map operation is not CPU + bound in order to improve throughput. +

    + Map implementations using this MapRunnable must be thread-safe. +

    + The Map-Reduce job has to be configured to use this MapRunnable class (using + the JobConf.setMapRunnerClass method) and + the number of thread the thread-pool can use with the + mapred.map.multithreadedrunner.threads property, its default + value is 10 threads. +

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + pairs. Uses + {@link StringTokenizer} to break text into tokens.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + generateKeyValPairs(Object key, Object value); public void + configure(JobConfjob); } + + The package also provides a base class, ValueAggregatorBaseDescriptor, + implementing the above interface. The user can extend the base class and + implement generateKeyValPairs accordingly. + + The primary work of generateKeyValPairs is to emit one or more key/value + pairs based on the input key/value pair. The key in an output key/value pair + encode two pieces of information: aggregation type and aggregation id. The + value will be aggregated onto the aggregation id according the aggregation + type. + + This class offers a function to generate a map/reduce job using Aggregate + framework. The function takes the following parameters: input directory spec + input format (text or sequence file) output directory a file specifying the + user plugin class]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When constructing the instance, if the factory property + contextName.class exists, + its value is taken to be the name of the class to instantiate. Otherwise, + the default is to create an instance of + org.apache.hadoop.metrics.spi.NullContext, which is a + dummy "no-op" context which will cause all metric data to be discarded. + + @param contextName the name of the context + @return the named MetricsContext]]> + + + + + + + + + + + + + + When the instance is constructed, this method checks if the file + hadoop-metrics.properties exists on the class path. If it + exists, it must be in the format defined by java.util.Properties, and all + the properties in the file are set as attributes on the newly created + ContextFactory instance. + + @return the singleton ContextFactory instance]]> + + + + getFactory() method.]]> + + + + + + + + + + + + + + + + + + + startMonitoring() again after calling + this. + @see #close()]]> + + + + + + + + + + + + + + + + recordName. + Throws an exception if the metrics implementation is configured with a fixed + set of record names and recordName is not in that set. + + @param recordName the name of the record + @throws MetricsException if recordName conflicts with configuration data]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A record name identifies the kind of data to be reported. For example, a + program reporting statistics relating to the disks on a computer might use + a record name "diskStats".

    + + A record has zero or more tags. A tag has a name and a value. To + continue the example, the "diskStats" record might use a tag named + "diskName" to identify a particular disk. Sometimes it is useful to have + more than one tag, so there might also be a "diskType" with value "ide" or + "scsi" or whatever.

    + + A record also has zero or more metrics. These are the named + values that are to be reported to the metrics system. In the "diskStats" + example, possible metric names would be "diskPercentFull", "diskPercentBusy", + "kbReadPerSecond", etc.

    + + The general procedure for using a MetricsRecord is to fill in its tag and + metric values, and then call update() to pass the record to the + client library. + Metric data is not immediately sent to the metrics system + each time that update() is called. + An internal table is maintained, identified by the record name. This + table has columns + corresponding to the tag and the metric names, and rows + corresponding to each unique set of tag values. An update + either modifies an existing row in the table, or adds a new row with a set of + tag values that are different from all the other rows. Note that if there + are no tags, then there can be at most one row in the table.

    + + Once a row is added to the table, its data will be sent to the metrics system + on every timer period, whether or not it has been updated since the previous + timer period. If this is inappropriate, for example if metrics were being + reported by some transient object in an application, the remove() + method can be used to remove the row and thus stop the data from being + sent.

    + + Note that the update() method is atomic. This means that it is + safe for different threads to be updating the same metric. More precisely, + it is OK for different threads to call update() on MetricsRecord instances + with the same set of tag names and tag values. Different threads should + not use the same MetricsRecord instance at the same time.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MetricsContext.registerUpdater().]]> + + + + + + + + + + + + + + + + + + + + + + + + + fileName attribute, + if specified. Otherwise the data will be written to standard + output.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is configured by setting ContextFactory attributes which in turn + are usually configured through a properties file. All the attributes are + prefixed by the contextName. For example, the properties file might contain: +

    + myContextName.fileName=/tmp/metrics.log
    + myContextName.period=5
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + contextName.tableName. The returned map consists of + those attributes with the contextName and tableName stripped off.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + recordName. + Throws an exception if the metrics implementation is configured with a fixed + set of record names and recordName is not in that set. + + @param recordName the name of the record + @throws MetricsException if recordName conflicts with configuration data]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class implements the internal table of metric data, and the timer + on which data is to be sent to the metrics system. Subclasses must + override the abstract emitRecord method in order to transmit + the data.

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + update + and remove().]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hostname or hostname:port. If + the specs string is null, defaults to localhost:defaultPort. + + @return a list of InetSocketAddress objects.]]> + + + + + + + + + + + + + + + + + + + ,name=" + Where the and are the supplied parameters + + @param serviceName + @param nameName + @param theMbean - the MBean to register + @return the named used to register the MBean]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hadoop.rpc.socket.factory.class.<ClassName>. When no + such parameter exists then fall back on the default socket factory as + configured by hadoop.rpc.socket.factory.class.default. If + this default socket factory is not configured, then fall back on the JVM + default socket factory. + + @param conf the configuration + @param clazz the class (usually a {@link VersionedProtocol}) + @return a socket factory]]> + + + + + + hadoop.rpc.socket.factory.default + + @param conf the configuration + @return the default socket factory as specified in the configuration or + the JVM default socket factory if the configuration does not + contain a default socket factory property.]]> + + + + + + + + + + + + + : + ://:/]]> + + + + + + + + : + ://:/]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + From documentation for {@link #getInputStream(Socket, long)}:
    + Returns InputStream for the socket. If the socket has an associated + SocketChannel then it returns a + {@link SocketInputStream} with the given timeout. If the socket does not + have a channel, {@link Socket#getInputStream()} is returned. In the later + case, the timeout argument is ignored and the timeout set with + {@link Socket#setSoTimeout(int)} applies for reads.

    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getInputStream()}. + + @see #getInputStream(Socket, long) + + @param socket + @return InputStream for reading from the socket. + @throws IOException]]> +
    +
    + + + + + +
    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getInputStream()}. + + @see Socket#getChannel() + + @param socket + @param timeout timeout in milliseconds. This may not always apply. zero + for waiting as long as necessary. + @return InputStream for reading from the socket. + @throws IOException]]> +
    +
    + + + + +
    + + From documentation for {@link #getOutputStream(Socket, long)} :
    + Returns OutputStream for the socket. If the socket has an associated + SocketChannel then it returns a + {@link SocketOutputStream} with the given timeout. If the socket does not + have a channel, {@link Socket#getOutputStream()} is returned. In the later + case, the timeout argument is ignored and the write will wait until + data is available.

    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getOutputStream()}. + + @see #getOutputStream(Socket, long) + + @param socket + @return OutputStream for writing to the socket. + @throws IOException]]> +
    +
    + + + + + +
    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getOutputStream()}. + + @see Socket#getChannel() + + @param socket + @param timeout timeout in milliseconds. This may not always apply. zero + for waiting as long as necessary. + @return OutputStream for writing to the socket. + @throws IOException]]> +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + node + + @param node + a node + @return true if node is already in the tree; false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + scope + if scope starts with ~, choose one from the all nodes except for the + ones in scope; otherwise, choose one from scope + @param scope range of nodes from which a node will be choosen + @return the choosen node]]> + + + + + + + scope but not in excludedNodes + if scope starts with ~, return the number of nodes that are not + in scope and excludedNodes; + @param scope a path string that may start with ~ + @param excludedNodes a list of nodes + @return number of available nodes]]> + + + + + + + + + + + + reader + It linearly scans the array, if a local node is found, swap it with + the first element of the array. + If a local rack node is found, swap it with the first element following + the local node. + If neither local node or local rack node is found, put a random replica + location at postion 0. + It leaves the rest nodes untouched.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + Create a new input stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + + @see SocketInputStream#SocketInputStream(ReadableByteChannel, long) + + @param socket should have a channel associated with it. + @param timeout timeout timeout in milliseconds. must not be negative. + @throws IOException]]> +
    +
    + + + +
    + + Create a new input stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + @see SocketInputStream#SocketInputStream(ReadableByteChannel, long) + + @param socket should have a channel associated with it. + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + +
    + + Create a new ouput stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + + @see SocketOutputStream#SocketOutputStream(WritableByteChannel, long) + + @param socket should have a channel associated with it. + @param timeout timeout timeout in milliseconds. must not be negative. + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + = getCount(). + @param newCapacity The new capacity in bytes.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Index idx = startVector(...); + while (!idx.done()) { + .... // read element of a vector + idx.incr(); + } + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This task takes the given record definition files and compiles them into + java or c++ + files. It is then up to the user to compile the generated files. + +

    The task requires the file or the nested fileset element to be + specified. Optional attributes are language (set the output + language, default is "java"), + destdir (name of the destination directory for generated java/c++ + code, default is ".") and failonerror (specifies error handling + behavior. default is true). +

    Usage

    +
    + <recordcc
    +       destdir="${basedir}/gensrc"
    +       language="java">
    +   <fileset include="**\/*.jr" />
    + </recordcc>
    + 
    ]]> +
    +
    + +
    + + + + + + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ugi as a comma separated string in + conf as a property attr + + The String starts with the user name followed by the default group names, + and other group names. + + @param conf configuration + @param attr property name + @param ugi a UnixUserGroupInformation]]> + + + + + + + + conf + + The object is expected to store with the property name attr + as a comma separated string that starts + with the user name followed by group names. + If the property name is not defined, return null. + It's assumed that there is only one UGI per user. If this user already + has a UGI in the ugi map, return the ugi in the map. + Otherwise, construct a UGI from the configuration, store it in the + ugi map and return it. + + @param conf configuration + @param attr property name + @return a UnixUGI + @throws LoginException if the stored string is ill-formatted.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This tool supports archiving and anaylzing (sort/grep) of log-files. + It takes as input + a) Input uri which will serve uris of the logs to be archived. + b) Output directory (not mandatory). + b) Directory on dfs to archive the logs. + c) The sort/grep patterns for analyzing the files and separator for boundaries. + Usage: + Logalyzer -archive -archiveDir -analysis -logs -grep -sort -separator +

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GenericOptionsParser to parse only the generic Hadoop + arguments. + + The array of string arguments other than the generic arguments can be + obtained by {@link #getRemainingArgs()}. + + @param conf the Configuration to modify. + @param args command-line arguments.]]> + + + + + GenericOptionsParser to parse given options as well + as generic Hadoop options. + + The resulting CommandLine object can be obtained by + {@link #getCommandLine()}. + + @param conf the configuration to modify + @param options options built by the caller + @param args User-specified arguments]]> + + + + + Strings containing the un-parsed arguments + or empty array if commandLine was not defined.]]> + + + + + CommandLine object + to process the parsed arguments. + + Note: If the object is created with + {@link #GenericOptionsParser(Configuration, String[])}, then returned + object will only contain parsed generic options. + + @return CommandLine representing list of arguments + parsed against Options descriptor.]]> + + + + + + + + + + GenericOptionsParser is a utility to parse command line + arguments generic to the Hadoop framework. + + GenericOptionsParser recognizes several standarad command + line arguments, enabling applications to easily specify a namenode, a + jobtracker, additional configuration resources etc. + +

    Generic Options

    + +

    The supported generic options are:

    +

    +     -conf <configuration file>     specify a configuration file
    +     -D <property=value>            use value for given property
    +     -fs <local|namenode:port>      specify a namenode
    +     -jt <local|jobtracker:port>    specify a job tracker
    +     -files <comma separated list of files>    specify comma separated
    +                            files to be copied to the map reduce cluster
    +     -libjars <comma separated list of jars>   specify comma separated
    +                            jar files to include in the classpath.
    +     -archives <comma separated list of archives>    specify comma
    +             separated archives to be unarchived on the compute machines.
    +
    + 

    + +

    The general command line syntax is:

    +

    + bin/hadoop command [genericOptions] [commandOptions]
    + 

    + +

    Generic command line arguments might modify + Configuration objects, given to constructors.

    + +

    The functionality is implemented using Commons CLI.

    + +

    Examples:

    +

    + $ bin/hadoop dfs -fs darwin:8020 -ls /data
    + list /data directory in dfs with namenode darwin:8020
    + 
    + $ bin/hadoop dfs -D fs.default.name=darwin:8020 -ls /data
    + list /data directory in dfs with namenode darwin:8020
    +     
    + $ bin/hadoop dfs -conf hadoop-site.xml -ls /data
    + list /data directory in dfs with conf specified in hadoop-site.xml
    +     
    + $ bin/hadoop job -D mapred.job.tracker=darwin:50020 -submit job.xml
    + submit a job to job tracker darwin:50020
    +     
    + $ bin/hadoop job -jt darwin:50020 -submit job.xml
    + submit a job to job tracker darwin:50020
    +     
    + $ bin/hadoop job -jt local -submit job.xml
    + submit a job to local runner
    + 
    + $ bin/hadoop jar -libjars testlib.jar 
    + -archives test.tgz -files file.txt inputjar args
    + job submission with libjars, files and archives
    + 

    + + @see Tool + @see ToolRunner]]> +
    +
    + + + + + + + + + Class<T>) of the + argument of type T. + @param The type of the argument + @param t the object to get it class + @return Class<T>]]> + + + + + + + List<T> to a an array of + T[]. + @param c the Class object of the items in the list + @param list the list to convert]]> + + + + + + List<T> to a an array of + T[]. + @param list the list to convert + @throws ArrayIndexOutOfBoundsException if the list is empty. + Use {@link #toArray(Class, List)} if the list may be empty.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if native-hadoop is loaded, + else false]]> + + + + + + true if native hadoop libraries, if present, can be + used for this job; false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { pq.top().change(); pq.adjustTop(); } + instead of
    +  { o = pq.pop(); o.change(); pq.push(o); }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Clients and/or applications can use the provided Progressable + to explicitly report progress to the Hadoop framework. This is especially + important for operations which take an insignificant amount of time since, + in-lieu of the reported progress, the framework has to assume that an error + has occured and time-out the operation.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Class is to be obtained + @return the correctly typed Class of the given object.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Hadoop Pipes + or Hadoop Streaming. + + It also checks to ensure that we are running on a *nix platform else + (e.g. in Cygwin/Windows) it returns null. + @param job job configuration + @return a String[] with the ulimit command arguments or + null if we are running on a non *nix platform or + if the limit is unspecified.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Shell interface. + @param cmd shell command to execute. + @return the output of the executed command.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + Shell can be used to run unix commands like du or + df. It also offers facilities to gate commands by + time-intervals.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ShellCommandExecutorshould be used in cases where the output + of the command needs no explicit parsing and where the command, working + directory and the environment remains unchanged. The output of the command + is stored as-is and is expected to be small.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ArrayList of string values]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + charToEscape in the string + with the escape char escapeChar + + @param str string + @param escapeChar escape char + @param charToEscape the char to be escaped + @return an escaped string]]> + + + + + + + + + + + + + + charToEscape in the string + with the escape char escapeChar + + @param str string + @param escapeChar escape char + @param charToEscape the escaped char + @return an unescaped string]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tool, is the standard for any Map-Reduce tool/application. + The tool/application should delegate the handling of + + standard command-line options to {@link ToolRunner#run(Tool, String[])} + and only handle its custom arguments.

    + +

    Here is how a typical Tool is implemented:

    +

    +     public class MyApp extends Configured implements Tool {
    +     
    +       public int run(String[] args) throws Exception {
    +         // Configuration processed by ToolRunner
    +         Configuration conf = getConf();
    +         
    +         // Create a JobConf using the processed conf
    +         JobConf job = new JobConf(conf, MyApp.class);
    +         
    +         // Process custom command-line options
    +         Path in = new Path(args[1]);
    +         Path out = new Path(args[2]);
    +         
    +         // Specify various job-specific parameters     
    +         job.setJobName("my-app");
    +         job.setInputPath(in);
    +         job.setOutputPath(out);
    +         job.setMapperClass(MyApp.MyMapper.class);
    +         job.setReducerClass(MyApp.MyReducer.class);
    +
    +         // Submit the job, then poll for progress until the job is complete
    +         JobClient.runJob(job);
    +       }
    +       
    +       public static void main(String[] args) throws Exception {
    +         // Let ToolRunner handle generic command-line options 
    +         int res = ToolRunner.run(new Configuration(), new Sort(), args);
    +         
    +         System.exit(res);
    +       }
    +     }
    + 

    + + @see GenericOptionsParser + @see ToolRunner]]> +
    +
    + + + + + + + + + + + + Tool by {@link Tool#run(String[])}, after + parsing with the given generic arguments. Uses the given + Configuration, or builds one if null. + + Sets the Tool's configuration with the possibly modified + version of the conf. + + @param conf Configuration for the Tool. + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + Tool with its Configuration. + + Equivalent to run(tool.getConf(), tool, args). + + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + + + ToolRunner can be used to run classes implementing + Tool interface. It works in conjunction with + {@link GenericOptionsParser} to parse the + + generic hadoop command line arguments and modifies the + Configuration of the Tool. The + application-specific options are passed along without being modified. +

    + + @see Tool + @see GenericOptionsParser]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + +
    diff --git a/aarch64/share/hadoop/common/jdiff/hadoop_0.18.3.xml b/aarch64/share/hadoop/common/jdiff/hadoop_0.18.3.xml new file mode 100644 index 0000000..564916f --- /dev/null +++ b/aarch64/share/hadoop/common/jdiff/hadoop_0.18.3.xml @@ -0,0 +1,38826 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + final. + + @param name resource to be added, the classpath is examined for a file + with that name.]]> + + + + + + final. + + @param url url of the resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + final. + + @param file file-path of resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + name property, null if + no such property exists. + + Values are processed for variable expansion + before being returned. + + @param name the property name. + @return the value of the name property, + or null if no such property exists.]]> + + + + + + name property, without doing + variable expansion. + + @param name the property name. + @return the value of the name property, + or null if no such property exists.]]> + + + + + + + value of the name property. + + @param name property name. + @param value property value.]]> + + + + + + + name property. If no such property + exists, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value, or defaultValue if the property + doesn't exist.]]> + + + + + + + name property as an int. + + If no such property exists, or if the specified value is not a valid + int, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as an int, + or defaultValue.]]> + + + + + + + name property to an int. + + @param name property name. + @param value int value of the property.]]> + + + + + + + name property as a long. + If no such property is specified, or if the specified value is not a valid + long, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a long, + or defaultValue.]]> + + + + + + + name property to a long. + + @param name property name. + @param value long value of the property.]]> + + + + + + + name property as a float. + If no such property is specified, or if the specified value is not a valid + float, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a float, + or defaultValue.]]> + + + + + + + name property as a boolean. + If no such property is specified, or if the specified value is not a valid + boolean, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a boolean, + or defaultValue.]]> + + + + + + + name property to a boolean. + + @param name property name. + @param value boolean value of the property.]]> + + + + + + + + + + + + + name property as + a collection of Strings. + If no such property is specified then empty collection is returned. +

    + This is an optimized version of {@link #getStrings(String)} + + @param name property name. + @return property value as a collection of Strings.]]> + + + + + + name property as + an array of Strings. + If no such property is specified then null is returned. + + @param name property name. + @return property value as an array of Strings, + or null.]]> + + + + + + + name property as + an array of Strings. + If no such property is specified then default value is returned. + + @param name property name. + @param defaultValue The default value + @return property value as an array of Strings, + or default value.]]> + + + + + + + name property as + as comma delimited values. + + @param name property name. + @param values The values]]> + + + + + + + + + + + + + + name property as a Class. + If no such property is specified, then defaultValue is + returned. + + @param name the class name. + @param defaultValue default value. + @return property value as a Class, + or defaultValue.]]> + + + + + + + + name property as a Class + implementing the interface specified by xface. + + If no such property is specified, then defaultValue is + returned. + + An exception is thrown if the returned class does not implement the named + interface. + + @param name the class name. + @param defaultValue default value. + @param xface the interface implemented by the named class. + @return property value as a Class, + or defaultValue.]]> + + + + + + + + name property to the name of a + theClass implementing the given interface xface. + + An exception is thrown if theClass does not implement the + interface xface. + + @param name property name. + @param theClass property value. + @param xface the interface implemented by the named class.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + + + + + name. + + @param name configuration resource name. + @return an input stream attached to the resource.]]> + + + + + + name. + + @param name configuration resource name. + @return a reader attached to the resource.]]> + + + + + String + key-value pairs in the configuration. + + @return an iterator over the entries.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + true to set quiet-mode on, false + to turn it off.]]> + + + + + + + + + + + Resources + +

    Configurations are specified by resources. A resource contains a set of + name/value pairs as XML data. Each resource is named by either a + String or by a {@link Path}. If named by a String, + then the classpath is examined for a file with that name. If named by a + Path, then the local filesystem is examined directly, without + referring to the classpath. + +

    Hadoop by default specifies two resources, loaded in-order from the + classpath:

      +
    1. hadoop-default.xml + : Read-only defaults for hadoop.
    2. +
    3. hadoop-site.xml: Site-specific configuration for a given hadoop + installation.
    4. +
    + Applications may add additional resources, which are loaded + subsequent to these resources in the order they are added. + +

    Final Parameters

    + +

    Configuration parameters may be declared final. + Once a resource declares a value final, no subsequently-loaded + resource can alter that value. + For example, one might define a final parameter with: +

    +  <property>
    +    <name>dfs.client.buffer.dir</name>
    +    <value>/tmp/hadoop/dfs/client</value>
    +    <final>true</final>
    +  </property>
    + + Administrators typically define parameters as final in + hadoop-site.xml for values that user applications may not alter. + +

    Variable Expansion

    + +

    Value strings are first processed for variable expansion. The + available properties are:

      +
    1. Other properties defined in this Configuration; and, if a name is + undefined here,
    2. +
    3. Properties in {@link System#getProperties()}.
    4. +
    + +

    For example, if a configuration resource contains the following property + definitions: +

    +  <property>
    +    <name>basedir</name>
    +    <value>/user/${user.name}</value>
    +  </property>
    +  
    +  <property>
    +    <name>tempdir</name>
    +    <value>${basedir}/tmp</value>
    +  </property>
    + + When conf.get("tempdir") is called, then ${basedir} + will be resolved to another property in this Configuration, while + ${user.name} would then ordinarily be resolved to the value + of the System property with that name.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DistributedCache is a facility provided by the Map-Reduce + framework to cache files (text, archives, jars etc.) needed by applications. +

    + +

    Applications specify the files, via urls (hdfs:// or http://) to be cached + via the {@link JobConf}. The DistributedCache assumes that the + files specified via hdfs:// urls are already present on the + {@link FileSystem} at the path specified by the url.

    + +

    The framework will copy the necessary files on to the slave node before + any tasks for the job are executed on that node. Its efficiency stems from + the fact that the files are only copied once per job and the ability to + cache archives which are un-archived on the slaves.

    + +

    DistributedCache can be used to distribute simple, read-only + data/text files and/or more complex types such as archives, jars etc. + Archives (zip, tar and tgz/tar.gz files) are un-archived at the slave nodes. + Jars may be optionally added to the classpath of the tasks, a rudimentary + software distribution mechanism. Files have execution permissions. + Optionally users can also direct it to symlink the distributed cache file(s) + into the working directory of the task.

    + +

    DistributedCache tracks modification timestamps of the cache + files. Clearly the cache files should not be modified by the application + or externally while the job is executing.

    + +

    Here is an illustrative example on how to use the + DistributedCache:

    +

    +     // Setting up the cache for the application
    +     
    +     1. Copy the requisite files to the FileSystem:
    +     
    +     $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat  
    +     $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip  
    +     $ bin/hadoop fs -copyFromLocal mylib.jar /myapp/mylib.jar
    +     $ bin/hadoop fs -copyFromLocal mytar.tar /myapp/mytar.tar
    +     $ bin/hadoop fs -copyFromLocal mytgz.tgz /myapp/mytgz.tgz
    +     $ bin/hadoop fs -copyFromLocal mytargz.tar.gz /myapp/mytargz.tar.gz
    +     
    +     2. Setup the application's JobConf:
    +     
    +     JobConf job = new JobConf();
    +     DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"), 
    +                                   job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/map.zip", job);
    +     DistributedCache.addFileToClassPath(new Path("/myapp/mylib.jar"), job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytar.tar", job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytgz.tgz", job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytargz.tar.gz", job);
    +     
    +     3. Use the cached files in the {@link Mapper} or {@link Reducer}:
    +     
    +     public static class MapClass extends MapReduceBase  
    +     implements Mapper<K, V, K, V> {
    +     
    +       private Path[] localArchives;
    +       private Path[] localFiles;
    +       
    +       public void configure(JobConf job) {
    +         // Get the cached archives/files
    +         localArchives = DistributedCache.getLocalCacheArchives(job);
    +         localFiles = DistributedCache.getLocalCacheFiles(job);
    +       }
    +       
    +       public void map(K key, V value, 
    +                       OutputCollector<K, V> output, Reporter reporter) 
    +       throws IOException {
    +         // Use data from the cached archives/files here
    +         // ...
    +         // ...
    +         output.collect(k, v);
    +       }
    +     }
    +     
    + 

    + + @see JobConf + @see JobClient]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BufferedFSInputStream + with the specified buffer size, + and saves its argument, the input stream + in, for later use. An internal + buffer array of length size + is created and stored in buf. + + @param in the underlying input stream. + @param size the buffer size. + @exception IllegalArgumentException if size <= 0.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + setReplication of FileSystem + @param src file name + @param replication new replication + @throws IOException + @return true if successful; + false if file does not exist or is a directory]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ']]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + fs.scheme.class whose value names the FileSystem class. + The entire URI is passed to the FileSystem instance's initialize method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Return all the files that match filePattern and are not checksum + files. Results are sorted by their names. + +

    + A filename pattern is composed of regular characters and + special pattern matching characters, which are: + +

    +
    +
    +

    +

    ? +
    Matches any single character. + +

    +

    * +
    Matches zero or more characters. + +

    +

    [abc] +
    Matches a single character from character set + {a,b,c}. + +

    +

    [a-b] +
    Matches a single character from the character range + {a...b}. Note that character a must be + lexicographically less than or equal to character b. + +

    +

    [^a] +
    Matches a single character that is not from character set or range + {a}. Note that the ^ character must occur + immediately to the right of the opening bracket. + +

    +

    \c +
    Removes (escapes) any special meaning of character c. + +

    +

    {ab,cd} +
    Matches a string from the string set {ab, cd} + +

    +

    {ab,c{de,fh}} +
    Matches a string from the string set {ab, cde, cfh} + +
    +
    +
    + + @param pathPattern a regular expression specifying a pth pattern + + @return an array of paths that match the path pattern + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + All user code that may potentially use the Hadoop Distributed + File System should be written to use a FileSystem object. The + Hadoop DFS is a multi-machine system that appears as a single + disk. It's useful because of its fault tolerance and potentially + very large capacity. + +

    + The local implementation is {@link LocalFileSystem} and distributed + implementation is {@link DistributedFileSystem}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FilterFileSystem contains + some other file system, which it uses as + its basic file system, possibly transforming + the data along the way or providing additional + functionality. The class FilterFileSystem + itself simply overrides all methods of + FileSystem with versions that + pass all requests to the contained file + system. Subclasses of FilterFileSystem + may further override some of these methods + and may also provide additional methods + and fields.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + buf at offset + and checksum into checksum. + The method is used for implementing read, therefore, it should be optimized + for sequential reading + @param pos chunkPos + @param buf desitination buffer + @param offset offset in buf at which to store data + @param len maximun number of bytes to read + @return number of bytes read]]> + + + + + + + + + + + + + + + + + -1 if the end of the + stream is reached. + @exception IOException if an I/O error occurs.]]> + + + + + + + + + This method implements the general contract of the corresponding + {@link InputStream#read(byte[], int, int) read} method of + the {@link InputStream} class. As an additional + convenience, it attempts to read as many bytes as possible by repeatedly + invoking the read method of the underlying stream. This + iterated read continues until one of the following + conditions becomes true:

      + +
    • The specified number of bytes have been read, + +
    • The read method of the underlying stream returns + -1, indicating end-of-file. + +
    If the first read on the underlying stream returns + -1 to indicate end-of-file then this method returns + -1. Otherwise this method returns the number of bytes + actually read. + + @param b destination buffer. + @param off offset at which to start storing bytes. + @param len maximum number of bytes to read. + @return the number of bytes read, or -1 if the end of + the stream has been reached. + @exception IOException if an I/O error occurs. + ChecksumException if any checksum error occurs]]> +
    + + + + + + + + + + + + n bytes of data from the + input stream. + +

    This method may skip more bytes than are remaining in the backing + file. This produces no exception and the number of bytes skipped + may include some number of bytes that were beyond the EOF of the + backing file. Attempting to read from the stream after skipping past + the end will result in -1 indicating the end of the file. + +

    If n is negative, no bytes are skipped. + + @param n the number of bytes to be skipped. + @return the actual number of bytes skipped. + @exception IOException if an I/O error occurs. + ChecksumException if the chunk to skip to is corrupted]]> + + + + + + + This method may seek past the end of the file. + This produces no exception and an attempt to read from + the stream will result in -1 indicating the end of the file. + + @param pos the postion to seek to. + @exception IOException if an I/O error occurs. + ChecksumException if the chunk to seek to is corrupted]]> + + + + + + + + + + len bytes from + stm + + @param stm an input stream + @param buf destiniation buffer + @param offset offset at which to store data + @param len number of bytes to read + @return actual number of bytes read + @throws IOException if there is any IO error]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + len bytes from the specified byte array + starting at offset off and generate a checksum for + each data chunk. + +

    This method stores bytes from the given array into this + stream's buffer before it gets checksumed. The buffer gets checksumed + and flushed to the underlying output stream when all data + in a checksum chunk are in the buffer. If the buffer is empty and + requested length is at least as large as the size of next checksum chunk + size, this method will checksum and write the chunk directly + to the underlying output stream. Thus it avoids uneccessary data copy. + + @param b the data. + @param off the start offset in the data. + @param len the number of bytes to write. + @exception IOException if an I/O error occurs.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if and only if pathname + should be included]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + trash feature. Files are moved to a user's trash + directory, a subdirectory of their home directory named ".Trash". Files are + initially moved to a current sub-directory of the trash directory. + Within that sub-directory their original path is preserved. Periodically + one may checkpoint the current trash and remove older checkpoints. (This + design permits trash management without enumeration of the full trash + content, without date support in the filesystem, and without clock + synchronization.)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} backed by an FTP client provided by Apache Commons Net. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is a tool for migrating data from an older to a newer version + of an S3 filesystem. +

    +

    + All files in the filesystem are migrated by re-writing the block metadata + - no datafiles are touched. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + Extracts AWS credentials from the filesystem URI or configuration. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A block-based {@link FileSystem} backed by + Amazon S3. +

    + @see NativeS3FileSystem]]> +
    +
    + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If f is a file, this method will make a single call to S3. + If f is a directory, this method will make a maximum of + (n / 1000) + 2 calls to S3, where n is the total number of + files and directories contained directly in f. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} for reading and writing files stored on + Amazon S3. + Unlike {@link org.apache.hadoop.fs.s3.S3FileSystem} this implementation + stores files on S3 in their + native form so they can be read by other S3 tools. +

    + @see org.apache.hadoop.fs.s3.S3FileSystem]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + nth value.]]> + + + + + + + + + + + + + + + + + + + + + nth value in the file.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + public class IntArrayWritable extends ArrayWritable { + public IntArrayWritable() { + super(IntWritable.class); + } + } + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a ByteWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new DataInputStream and + ByteArrayInputStream each time data is read. + +

    Typical usage is something like the following:

    +
    + DataInputBuffer buffer = new DataInputBuffer();
    + while (... loop condition ...) {
    +   byte[] data = ... get data ...;
    +   int dataLength = ... get data length ...;
    +   buffer.reset(data, dataLength);
    +   ... read buffer using DataInput methods ...
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new DataOutputStream and + ByteArrayOutputStream each time data is written. + +

    Typical usage is something like the following:

    +
    + DataOutputBuffer buffer = new DataOutputBuffer();
    + while (... loop condition ...) {
    +   buffer.reset();
    +   ... write buffer using DataOutput methods ...
    +   byte[] data = buffer.getData();
    +   int dataLength = buffer.getLength();
    +   ... write data to its ultimate destination ...
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + the class of the item + @param conf the configuration to store + @param item the object to be stored + @param keyName the name of the key to use + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param items the objects to be stored + @param keyName the name of the key to use + @throws IndexOutOfBoundsException if the items array is empty + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + DefaultStringifier offers convenience methods to store/load objects to/from + the configuration. + + @param the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a DoubleWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a FloatWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When two sequence files, which have same Key type but different Value + types, are mapped out to reduce, multiple Value types is not allowed. + In this case, this class can help you wrap instances with different types. +

    + +

    + Compared with ObjectWritable, this class is much more effective, + because ObjectWritable will append the class declaration as a String + into the output file in every Key-Value pair. +

    + +

    + Generic Writable implements {@link Configurable} interface, so that it will be + configured by the framework. The configuration is passed to the wrapped objects + implementing {@link Configurable} interface before deserialization. +

    + + how to use it:
    + 1. Write your own class, such as GenericObject, which extends GenericWritable.
    + 2. Implements the abstract method getTypes(), defines + the classes which will be wrapped in GenericObject in application. + Attention: this classes defined in getTypes() method, must + implement Writable interface. +

    + + The code looks like this: +
    + public class GenericObject extends GenericWritable {
    + 
    +   private static Class[] CLASSES = {
    +               ClassType1.class, 
    +               ClassType2.class,
    +               ClassType3.class,
    +               };
    +
    +   protected Class[] getTypes() {
    +       return CLASSES;
    +   }
    +
    + }
    + 
    + + @since Nov 8, 2006]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new InputStream and + ByteArrayInputStream each time data is read. + +

    Typical usage is something like the following:

    +
    + InputBuffer buffer = new InputBuffer();
    + while (... loop condition ...) {
    +   byte[] data = ... get data ...;
    +   int dataLength = ... get data length ...;
    +   buffer.reset(data, dataLength);
    +   ... read buffer using InputStream methods ...
    + }
    + 
    + @see DataInputBuffer + @see DataOutput]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a IntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + closes the input and output streams + at the end. + @param in InputStrem to read from + @param out OutputStream to write to + @param conf the Configuration object]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ignore any {@link IOException} or + null pointers. Must only be used for cleanup in exception handlers. + @param log the log to record problems to at debug level. Can be null. + @param closeables the objects to close]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a LongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A map is a directory containing two files, the data file, + containing all keys and values in the map, and a smaller index + file, containing a fraction of the keys. The fraction is determined by + {@link Writer#getIndexInterval()}. + +

    The index file is read entirely into memory. Thus key implementations + should try to keep themselves small. + +

    Map files are created by adding entries in-order. To maintain a large + database, perform updates by copying the previous version of a database and + merging in a sorted change list, to create a new version of the database in + a new file. Sorting large change lists can be done with {@link + SequenceFile.Sorter}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key and + val. Returns true if such a pair exists and false when at + the end of the map]]> + + + + + + + + + + + + + + + + key or if it does not exist, at the first entry + after the named key. + +- * @param key - key that we're trying to find +- * @param val - data value if key is found +- * @return - the key that was the closest match or null if eof.]]> + + + + + + + + + key does not exist, return + the first entry that falls just before the key. Otherwise, + return the record that sorts just after. + @return - the key that was the closest match or null if eof.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is an MD5Hash whose digest contains the + same values.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new OutputStream and + ByteArrayOutputStream each time data is written. + +

    Typical usage is something like the following:

    +
    + OutputBuffer buffer = new OutputBuffer();
    + while (... loop condition ...) {
    +   buffer.reset();
    +   ... write buffer using OutputStream methods ...
    +   byte[] data = buffer.getData();
    +   int dataLength = buffer.getLength();
    +   ... write data to its ultimate destination ...
    + }
    + 
    + @see DataOutputBuffer + @see InputBuffer]]> +
    +
    + + + + + + + + + + + + + + + A {@link Comparator} that operates directly on byte representations of + objects. +

    + @param + @see DeserializerComparator]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SequenceFiles are flat files consisting of binary key/value + pairs. + +

    SequenceFile provides {@link Writer}, {@link Reader} and + {@link Sorter} classes for writing, reading and sorting respectively.

    + + There are three SequenceFile Writers based on the + {@link CompressionType} used to compress key/value pairs: +
      +
    1. + Writer : Uncompressed records. +
    2. +
    3. + RecordCompressWriter : Record-compressed files, only compress + values. +
    4. +
    5. + BlockCompressWriter : Block-compressed files, both keys & + values are collected in 'blocks' + separately and compressed. The size of + the 'block' is configurable. +
    + +

    The actual compression algorithm used to compress key and/or values can be + specified by using the appropriate {@link CompressionCodec}.

    + +

    The recommended way is to use the static createWriter methods + provided by the SequenceFile to chose the preferred format.

    + +

    The {@link Reader} acts as the bridge and can read any of the above + SequenceFile formats.

    + +

    SequenceFile Formats

    + +

    Essentially there are 3 different formats for SequenceFiles + depending on the CompressionType specified. All of them share a + common header described below. + +

    +
      +
    • + version - 3 bytes of magic header SEQ, followed by 1 byte of actual + version number (e.g. SEQ4 or SEQ6) +
    • +
    • + keyClassName -key class +
    • +
    • + valueClassName - value class +
    • +
    • + compression - A boolean which specifies if compression is turned on for + keys/values in this file. +
    • +
    • + blockCompression - A boolean which specifies if block-compression is + turned on for keys/values in this file. +
    • +
    • + compression codec - CompressionCodec class which is used for + compression of keys and/or values (if compression is + enabled). +
    • +
    • + metadata - {@link Metadata} for this file. +
    • +
    • + sync - A sync marker to denote end of the header. +
    • +
    + +
    Uncompressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Value
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +
    Record-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Compressed Value
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +
    Block-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record Block +
        +
      • Compressed key-lengths block-size
      • +
      • Compressed key-lengths block
      • +
      • Compressed keys block-size
      • +
      • Compressed keys block
      • +
      • Compressed value-lengths block-size
      • +
      • Compressed value-lengths block
      • +
      • Compressed values block-size
      • +
      • Compressed values block
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +

    The compressed blocks of key lengths and value lengths consist of the + actual lengths of individual keys/values encoded in ZeroCompressedInteger + format.

    + + @see CompressionCodec]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key, skipping its + value. True if another entry exists, and false at end of file.]]> + + + + + + + + key and + val. Returns true if such a pair exists and false when at + end of file]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The position passed must be a position returned by {@link + SequenceFile.Writer#getLength()} when writing this file. To seek to an arbitrary + position, use {@link SequenceFile.Reader#sync(long)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SegmentDescriptor + @param segments the list of SegmentDescriptors + @param tmpDir the directory to write temporary files into + @return RawKeyValueIterator + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For best performance, applications should make sure that the {@link + Writable#readFields(DataInput)} implementation of their keys is + very efficient. In particular, it should avoid allocating memory.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This always returns a synchronized position. In other words, + immediately after calling {@link SequenceFile.Reader#seek(long)} with a position + returned by this method, {@link SequenceFile.Reader#next(Writable)} may be called. However + the key may be earlier in the file than key last written when this + method was called (e.g., with block-compression, it may be the first key + in the block that was being written when this method was called).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key. Returns + true if such a key exists and false when at the end of the set.]]> + + + + + + + key. + Returns key, or null if no match exists.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + position. Note that this + method avoids using the converter or doing String instatiation + @return the Unicode scalar value at position or -1 + if the position is invalid or points to a + trailing byte]]> + + + + + + + + + + what in the backing + buffer, starting as position start. The starting + position is measured in bytes and the return value is in + terms of byte position in the buffer. The backing buffer is + not converted to a string for this operation. + @return byte position of the first occurence of the search + string in the UTF-8 buffer or -1 if not found]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a Text with the same contents.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException.]]> + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException. + @return ByteBuffer: bytes stores at ByteBuffer.array() + and length is ByteBuffer.limit()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + In + addition, it provides methods for string traversal without converting the + byte array to a string.

    Also includes utilities for + serializing/deserialing a string, coding/decoding a string, checking if a + byte array contains valid UTF8 code, calculating the length of an encoded + string.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a UTF8 with the same contents.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + Also includes utilities for efficiently reading and writing UTF-8. + + @deprecated replaced by Text]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is useful when a class may evolve, so that instances written by the + old version of the class may still be processed by the new version. To + handle this situation, {@link #readFields(DataInput)} + implementations should catch {@link VersionMismatchException}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VIntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VLongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + out. + + @param out DataOuput to serialize this object into. + @throws IOException]]> + + + + + + + in. + +

    For efficiency, implementations should attempt to re-use storage in the + existing object where possible.

    + + @param in DataInput to deseriablize this object from. + @throws IOException]]> +
    + + + Any key or value type in the Hadoop Map-Reduce + framework implements this interface.

    + +

    Implementations typically implement a static read(DataInput) + method which constructs a new instance, calls {@link #readFields(DataInput)} + and returns the instance.

    + +

    Example:

    +

    +     public class MyWritable implements Writable {
    +       // Some data     
    +       private int counter;
    +       private long timestamp;
    +       
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +       
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +       
    +       public static MyWritable read(DataInput in) throws IOException {
    +         MyWritable w = new MyWritable();
    +         w.readFields(in);
    +         return w;
    +       }
    +     }
    + 

    ]]> +
    + + + + + + + + WritableComparables can be compared to each other, typically + via Comparators. Any type which is to be used as a + key in the Hadoop Map-Reduce framework should implement this + interface.

    + +

    Example:

    +

    +     public class MyWritableComparable implements WritableComparable {
    +       // Some data
    +       private int counter;
    +       private long timestamp;
    +       
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +       
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +       
    +       public int compareTo(MyWritableComparable w) {
    +         int thisValue = this.value;
    +         int thatValue = ((IntWritable)o).value;
    +         return (thisValue < thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
    +       }
    +     }
    + 

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The default implementation reads the data into two {@link + WritableComparable}s (using {@link + Writable#readFields(DataInput)}, then calls {@link + #compare(WritableComparable,WritableComparable)}.]]> + + + + + + + The default implementation uses the natural ordering, calling {@link + Comparable#compareTo(Object)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This base implemenation uses the natural ordering. To define alternate + orderings, override {@link #compare(WritableComparable,WritableComparable)}. + +

    One may optimize compare-intensive operations by overriding + {@link #compare(byte[],int,int,byte[],int,int)}. Static utility methods are + provided to assist in optimized implementations of this method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Enum type + @param in DataInput to read from + @param enumType Class type of Enum + @return Enum represented by String read from DataInput + @throws IOException]]> + + + + + + + + + + + + + + + + len number of bytes in input streamin + @param in input stream + @param len number of bytes to skip + @throws IOException when skipped less number of bytes]]> + + + + + + + + + + + + + + CompressionCodec for which to get the + Compressor + @return Compressor for the given + CompressionCodec from the pool or a new one]]> + + + + + + CompressionCodec for which to get the + Decompressor + @return Decompressor for the given + CompressionCodec the pool or a new one]]> + + + + + + Compressor to be returned to the pool]]> + + + + + + Decompressor to be returned to the + pool]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Implementations are assumed to be buffered. This permits clients to + reposition the underlying input stream then call {@link #resetState()}, + without having to also synchronize client buffers.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + true if a preset dictionary is needed for decompression. + @return true if a preset dictionary is needed for decompression]]> + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if native-lzo library is loaded & initialized; + else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + lzo compression/decompression pair. + http://www.oberhumer.com/opensource/lzo/]]> + + + + + + + + + + + + + + + + + + + + + true if lzo compressors are loaded & initialized, + else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if lzo decompressors are loaded & initialized, + else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return the total (non-negative) number of uncompressed bytes input so far]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return the total (non-negative) number of uncompressed bytes input so far]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if native-zlib is loaded & initialized + and can be loaded for this job, else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Keep trying a limited number of times, waiting a fixed time between attempts, + and then fail by re-throwing the exception. +

    ]]> +
    + + + + + + + + Keep trying for a maximum time, waiting a fixed time between attempts, + and then fail by re-throwing the exception. +

    ]]> +
    +
    + + + + + + + Keep trying a limited number of times, waiting a growing amount of time between attempts, + and then fail by re-throwing the exception. + The time between attempts is sleepTime mutliplied by the number of tries so far. +

    ]]> +
    +
    + + + + + + + Keep trying a limited number of times, waiting a growing amount of time between attempts, + and then fail by re-throwing the exception. + The time between attempts is sleepTime mutliplied by a random + number in the range of [0, 2 to the number of retries) +

    ]]> +
    +
    + + + + + + Set a default policy with some explicit handlers for specific exceptions. +

    ]]> +
    +
    + + + + + + A retry policy for RemoteException + Set a default policy with some explicit handlers for specific exceptions. +

    ]]> +
    +
    + + + + Try once, and fail by re-throwing the exception. + This corresponds to having no retry mechanism in place. +

    ]]> +
    +
    + + + + Try once, and fail silently for void methods, or by + re-throwing the exception for non-void methods. +

    ]]> +
    +
    + + + + Keep trying forever. +

    ]]> +
    +
    + + + A collection of useful implementations of {@link RetryPolicy}. +

    ]]> +
    +
    + + + + + + + + + + Determines whether the framework should retry a + method for the given exception, and the number + of retries that have been made for that operation + so far. +

    + @param e The exception that caused the method to fail. + @param retries The number of times the method has been retried. + @return true if the method should be retried, + false if the method should not be retried + but shouldn't fail with an exception (only for void methods). + @throws Exception The re-thrown exception e indicating + that the method failed and should not be retried further.]]> +
    +
    + + + Specifies a policy for retrying method failures. + Implementations of this interface should be immutable. +

    ]]> +
    +
    + + + + + + + + + + + + Create a proxy for an interface of an implementation class + using the same retry policy for each method in the interface. +

    + @param iface the interface that the retry will implement + @param implementation the instance whose methods should be retried + @param retryPolicy the policy for retirying method call failures + @return the retry proxy]]> +
    +
    + + + + + + + Create a proxy for an interface of an implementation class + using the a set of retry policies specified by method name. + If no retry policy is defined for a method then a default of + {@link RetryPolicies#TRY_ONCE_THEN_FAIL} is used. +

    + @param iface the interface that the retry will implement + @param implementation the instance whose methods should be retried + @param methodNameToPolicyMap a map of method names to retry policies + @return the retry proxy]]> +
    +
    + + + A factory for creating retry proxies. +

    ]]> +
    +
    + +
    + + + + + + + + Prepare the deserializer for reading.

    ]]> +
    +
    + + + + + + Deserialize the next object from the underlying input stream. + If the object t is non-null then this deserializer + may set its internal state to the next object read from the input + stream. Otherwise, if the object t is null a new + deserialized object will be created. +

    + @return the deserialized object]]> +
    +
    + + + + Close the underlying input stream and clear up any resources.

    ]]> +
    +
    + + + Provides a facility for deserializing objects of type from an + {@link InputStream}. +

    + +

    + Deserializers are stateful, but must not buffer the input since + other producers may read from the input between calls to + {@link #deserialize(Object)}. +

    + @param ]]> +
    +
    + + + + + + + + + + + + + + + + + + A {@link RawComparator} that uses a {@link Deserializer} to deserialize + the objects to be compared so that the standard {@link Comparator} can + be used to compare them. +

    +

    + One may optimize compare-intensive operations by using a custom + implementation of {@link RawComparator} that operates directly + on byte representations. +

    + @param ]]> +
    +
    + + + + + + + + + + + + + + + + + + An experimental {@link Serialization} for Java {@link Serializable} classes. +

    + @see JavaSerializationComparator]]> +
    +
    + + + + + + + + + + + + + A {@link RawComparator} that uses a {@link JavaSerialization} + {@link Deserializer} to deserialize objects that are then compared via + their {@link Comparable} interfaces. +

    + @param + @see JavaSerialization]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + Encapsulates a {@link Serializer}/{@link Deserializer} pair. +

    + @param ]]> +
    +
    + + + + + + + Serializations are found by reading the io.serializations + property from conf, which is a comma-delimited list of + classnames. +

    ]]> +
    +
    + + + + + + + + + + + + A factory for {@link Serialization}s. +

    ]]> +
    +
    + + + + + + + + Prepare the serializer for writing.

    ]]> +
    +
    + + + + + Serialize t to the underlying output stream.

    ]]> +
    +
    + + + + Close the underlying output stream and clear up any resources.

    ]]> +
    +
    + + + Provides a facility for serializing objects of type to an + {@link OutputStream}. +

    + +

    + Serializers are stateful, but must not buffer the output since + other producers may write to the output between calls to + {@link #serialize(Object)}. +

    + @param ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + param, to the IPC server running at + address, returning the value. Throws exceptions if there are + network problems or if the remote code threw an exception.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Unwraps any IOException. + + @param lookupTypes the desired exception class. + @return IOException, which is either the lookupClass exception or this.]]> + + + + + This unwraps any Throwable that has a constructor taking + a String as a parameter. + Otherwise it returns this. + + @return Throwable]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + protocol is a Java interface. All parameters and return types must + be one of: + +
    • a primitive type, boolean, byte, + char, short, int, long, + float, double, or void; or
    • + +
    • a {@link String}; or
    • + +
    • a {@link Writable}; or
    • + +
    • an array of the above types
    + + All methods in the protocol should throw only IOException. No field data of + the protocol instance is transmitted.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + handlerCount determines + the number of handler threads that will be used to process calls.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + This class has a number of metrics variables that are publicly accessible; + these variables (objects) have methods to update their values; + for example: +

    {@link #rpcQueueTime}.inc(time)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For the statistics that are sampled and averaged, one must specify + a metrics context that does periodic update calls. Most do. + The default Null metrics context however does NOT. So if you aren't + using any other metrics context then you can turn on the viewing and averaging + of sampled metrics by specifying the following two lines + in the hadoop-meterics.properties file: +

    +        rpc.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
    +        rpc.period=10
    +  
    +

    + Note that the metrics are collected regardless of the context used. + The context with the update thread is used to average the data periodically]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobTracker, + as {@link JobTracker.State} + + @return the current state of the JobTracker.]]> + + + + + + + + + + + + ClusterStatus provides clients with information such as: +

      +
    1. + Size of the cluster. +
    2. +
    3. + Task capacity of the cluster. +
    4. +
    5. + The number of currently running map & reduce tasks. +
    6. +
    7. + State of the JobTracker. +
    8. +

    + +

    Clients can query for the latest ClusterStatus, via + {@link JobClient#getClusterStatus()}.

    + + @see JobClient]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Counters represent global counters, defined either by the + Map-Reduce framework or applications. Each Counter can be of + any {@link Enum} type.

    + +

    Counters are bunched into {@link Group}s, each comprising of + counters from a particular Enum class.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Group of counters, comprising of counters from a particular + counter {@link Enum} class. + +

    Grouphandles localization of the class name and the + counter names.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileInputFormat implementations can override this and return + false to ensure that individual input files are never split-up + so that {@link Mapper}s process entire files. + + @param fs the file system that the file is on + @param filename the file name to check + @return is this file splitable?]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileInputFormat is the base class for all file-based + InputFormats. This provides a generic implementation of + {@link #getSplits(JobConf, int)}. + Subclasses of FileInputFormat can also override the + {@link #isSplitable(FileSystem, Path)} method to ensure input-files are + not split-up and are processed as a whole by {@link Mapper}s.]]> + + + + + + + + + + + + + + + + + + + true if the job output should be compressed, + false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tasks' Side-Effect Files + +

    Some applications need to create/write-to side-files, which differ from + the actual job-outputs. + +

    In such cases there could be issues with 2 instances of the same TIP + (running simultaneously e.g. speculative tasks) trying to open/write-to the + same file (path) on HDFS. Hence the application-writer will have to pick + unique names per task-attempt (e.g. using the attemptid, say + attempt_200709221812_0001_m_000000_0), not just per TIP.

    + +

    To get around this the Map-Reduce framework helps the application-writer + out by maintaining a special + ${mapred.output.dir}/_temporary/_${taskid} + sub-directory for each task-attempt on HDFS where the output of the + task-attempt goes. On successful completion of the task-attempt the files + in the ${mapred.output.dir}/_temporary/_${taskid} (only) + are promoted to ${mapred.output.dir}. Of course, the + framework discards the sub-directory of unsuccessful task-attempts. This + is completely transparent to the application.

    + +

    The application-writer can take advantage of this by creating any + side-files required in ${mapred.work.output.dir} during execution + of his reduce-task i.e. via {@link #getWorkOutputPath(JobConf)}, and the + framework will move them out similarly - thus she doesn't have to pick + unique paths per task-attempt.

    + +

    Note: the value of ${mapred.work.output.dir} during + execution of a particular task-attempt is actually + ${mapred.output.dir}/_temporary/_{$taskid}, and this value is + set by the map-reduce framework. So, just create any side-files in the + path returned by {@link #getWorkOutputPath(JobConf)} from map/reduce + task to take advantage of this feature.

    + +

    The entire discussion holds true for maps of jobs with + reducer=NONE (i.e. 0 reduces) since output of the map, in that case, + goes directly to HDFS.

    + + @return the {@link Path} to the task's temporary output directory + for the map-reduce job.]]> +
    +
    + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This method is used to validate the input directories when a job is + submitted so that the {@link JobClient} can fail early, with an useful + error message, in case of errors. For e.g. input directory does not exist. +

    + + @param job job configuration. + @throws InvalidInputException if the job does not have valid input + @deprecated getSplits is called in the client and can perform any + necessary validation of the input]]> +
    +
    + + + + + + Each {@link InputSplit} is then assigned to an individual {@link Mapper} + for processing.

    + +

    Note: The split is a logical split of the inputs and the + input files are not physically split into chunks. For e.g. a split could + be <input-file-path, start, offset> tuple. + + @param job job configuration. + @param numSplits the desired number of splits, a hint. + @return an array of {@link InputSplit}s for the job.]]> + + + + + + + + + It is the responsibility of the RecordReader to respect + record boundaries while processing the logical split to present a + record-oriented view to the individual task.

    + + @param split the {@link InputSplit} + @param job the job that this split belongs to + @return a {@link RecordReader}]]> +
    +
    + + InputFormat describes the input-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the InputFormat of the + job to:

    +

      +
    1. + Validate the input-specification of the job. +
    2. + Split-up the input file(s) into logical {@link InputSplit}s, each of + which is then assigned to an individual {@link Mapper}. +
    3. +
    4. + Provide the {@link RecordReader} implementation to be used to glean + input records from the logical InputSplit for processing by + the {@link Mapper}. +
    5. +
    + +

    The default behavior of file-based {@link InputFormat}s, typically + sub-classes of {@link FileInputFormat}, is to split the + input into logical {@link InputSplit}s based on the total size, in + bytes, of the input files. However, the {@link FileSystem} blocksize of + the input files is treated as an upper bound for input splits. A lower bound + on the split size can be set via + + mapred.min.split.size.

    + +

    Clearly, logical splits based on input-size is insufficient for many + applications since record boundaries are to respected. In such cases, the + application has to also implement a {@link RecordReader} on whom lies the + responsibilty to respect record-boundaries and present a record-oriented + view of the logical InputSplit to the individual task. + + @see InputSplit + @see RecordReader + @see JobClient + @see FileInputFormat]]> + + + + + + + + + + InputSplit. + + @return the number of bytes in the input split. + @throws IOException]]> + + + + + + InputSplit is + located as an array of Strings. + @throws IOException]]> + + + + InputSplit represents the data to be processed by an + individual {@link Mapper}. + +

    Typically, it presents a byte-oriented view on the input and is the + responsibility of {@link RecordReader} of the job to process this and present + a record-oriented view. + + @see InputFormat + @see RecordReader]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobClient.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + jobid doesn't correspond to any known job. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobClient is the primary interface for the user-job to interact + with the {@link JobTracker}. + + JobClient provides facilities to submit jobs, track their + progress, access component-tasks' reports/logs, get the Map-Reduce cluster + status information etc. + +

    The job submission process involves: +

      +
    1. + Checking the input and output specifications of the job. +
    2. +
    3. + Computing the {@link InputSplit}s for the job. +
    4. +
    5. + Setup the requisite accounting information for the {@link DistributedCache} + of the job, if necessary. +
    6. +
    7. + Copying the job's jar and configuration to the map-reduce system directory + on the distributed file-system. +
    8. +
    9. + Submitting the job to the JobTracker and optionally monitoring + it's status. +
    10. +

    + + Normally the user creates the application, describes various facets of the + job via {@link JobConf} and then uses the JobClient to submit + the job and monitor its progress. + +

    Here is an example on how to use JobClient:

    +

    +     // Create a new JobConf
    +     JobConf job = new JobConf(new Configuration(), MyJob.class);
    +     
    +     // Specify various job-specific parameters     
    +     job.setJobName("myjob");
    +     
    +     job.setInputPath(new Path("in"));
    +     job.setOutputPath(new Path("out"));
    +     
    +     job.setMapperClass(MyJob.MyMapper.class);
    +     job.setReducerClass(MyJob.MyReducer.class);
    +
    +     // Submit the job, then poll for progress until the job is complete
    +     JobClient.runJob(job);
    + 

    + +

    Job Control

    + +

    At times clients would chain map-reduce jobs to accomplish complex tasks + which cannot be done via a single map-reduce job. This is fairly easy since + the output of the job, typically, goes to distributed file-system and that + can be used as the input for the next job.

    + +

    However, this also means that the onus on ensuring jobs are complete + (success/failure) lies squarely on the clients. In such situations the + various job-control options are: +

      +
    1. + {@link #runJob(JobConf)} : submits the job and returns only after + the job has completed. +
    2. +
    3. + {@link #submitJob(JobConf)} : only submits the job, then poll the + returned handle to the {@link RunningJob} to query status and make + scheduling decisions. +
    4. +
    5. + {@link JobConf#setJobEndNotificationURI(String)} : setup a notification + on job-completion, thus avoiding polling. +
    6. +

    + + @see JobConf + @see ClusterStatus + @see Tool + @see DistributedCache]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if framework should keep the intermediate files + for failed tasks, false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Note: +

    + @param dir the {@link Path} of the output directory for the map-reduce job.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the outputs of the maps are to be compressed, + false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This comparator should be provided if the equivalence rules for keys + for sorting the intermediates are different from those for grouping keys + before each call to + {@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.

    + +

    For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed + in a single call to the reduce function if K1 and K2 compare as equal.

    + +

    Since {@link #setOutputKeyComparatorClass(Class)} can be used to control + how keys are sorted, this can be used in conjunction to simulate + secondary sort on values.

    + +

    Note: This is not a guarantee of the reduce sort being + stable in any sense. (In any case, with the order of available + map-outputs to the reduce being non-deterministic, it wouldn't make + that much sense.)

    + + @param theClass the comparator class to be used for grouping keys. + It should implement RawComparator. + @see #setOutputKeyComparatorClass(Class)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + combiner class used to combine map-outputs + before being sent to the reducers. Typically the combiner is same as the + the {@link Reducer} for the job i.e. {@link #getReducerClass()}. + + @return the user-defined combiner class used to combine map-outputs.]]> + + + + + + combiner class used to combine map-outputs + before being sent to the reducers. + +

    The combiner is a task-level aggregation operation which, in some cases, + helps to cut down the amount of data transferred from the {@link Mapper} to + the {@link Reducer}, leading to better performance.

    + +

    Typically the combiner is same as the the Reducer for the + job i.e. {@link #setReducerClass(Class)}.

    + + @param theClass the user-defined combiner class used to combine + map-outputs.]]> +
    +
    + + + + + + + + + + + true. + + @return true if speculative execution be used for this job, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on, else false.]]> + + + + + true. + + @return true if speculative execution be + used for this job for map tasks, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on for map tasks, + else false.]]> + + + + + true. + + @return true if speculative execution be used + for reduce tasks for this job, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on for reduce tasks, + else false.]]> + + + + + 1. + + @return the number of reduce tasks for this job.]]> + + + + + + Note: This is only a hint to the framework. The actual + number of spawned map tasks depends on the number of {@link InputSplit}s + generated by the job's {@link InputFormat#getSplits(JobConf, int)}. + + A custom {@link InputFormat} is typically used to accurately control + the number of map tasks for the job.

    + +

    How many maps?

    + +

    The number of maps is usually driven by the total size of the inputs + i.e. total number of blocks of the input files.

    + +

    The right level of parallelism for maps seems to be around 10-100 maps + per-node, although it has been set up to 300 or so for very cpu-light map + tasks. Task setup takes awhile, so it is best if the maps take at least a + minute to execute.

    + +

    The default behavior of file-based {@link InputFormat}s is to split the + input into logical {@link InputSplit}s based on the total size, in + bytes, of input files. However, the {@link FileSystem} blocksize of the + input files is treated as an upper bound for input splits. A lower bound + on the split size can be set via + + mapred.min.split.size.

    + +

    Thus, if you expect 10TB of input data and have a blocksize of 128MB, + you'll end up with 82,000 maps, unless {@link #setNumMapTasks(int)} is + used to set it even higher.

    + + @param n the number of map tasks for this job. + @see InputFormat#getSplits(JobConf, int) + @see FileInputFormat + @see FileSystem#getDefaultBlockSize() + @see FileStatus#getBlockSize()]]> +
    +
    + + + 1. + + @return the number of reduce tasks for this job.]]> + + + + + + How many reduces? + +

    The right number of reduces seems to be 0.95 or + 1.75 multiplied by (<no. of nodes> * + + mapred.tasktracker.reduce.tasks.maximum). +

    + +

    With 0.95 all of the reduces can launch immediately and + start transfering map outputs as the maps finish. With 1.75 + the faster nodes will finish their first round of reduces and launch a + second wave of reduces doing a much better job of load balancing.

    + +

    Increasing the number of reduces increases the framework overhead, but + increases load balancing and lowers the cost of failures.

    + +

    The scaling factors above are slightly less than whole numbers to + reserve a few reduce slots in the framework for speculative-tasks, failures + etc.

    + +

    Reducer NONE

    + +

    It is legal to set the number of reduce-tasks to zero.

    + +

    In this case the output of the map-tasks directly go to distributed + file-system, to the path set by + {@link FileOutputFormat#setOutputPath(JobConf, Path)}. Also, the + framework doesn't sort the map-outputs before writing it out to HDFS.

    + + @param n the number of reduce tasks for this job.]]> +
    +
    + + + mapred.map.max.attempts + property. If this property is not already set, the default is 4 attempts. + + @return the max number of attempts per map task.]]> + + + + + + + + + + + mapred.reduce.max.attempts + property. If this property is not already set, the default is 4 attempts. + + @return the max number of attempts per reduce task.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + noFailures, the + tasktracker is blacklisted for this job. + + @param noFailures maximum no. of failures of a given job per tasktracker.]]> + + + + + blacklisted for this job. + + @return the maximum no. of failures of a given job per tasktracker.]]> + + + + + failed. + + Defaults to zero, i.e. any failed map-task results in + the job being declared as {@link JobStatus#FAILED}. + + @return the maximum percentage of map tasks that can fail without + the job being aborted.]]> + + + + + + failed. + + @param percent the maximum percentage of map tasks that can fail without + the job being aborted.]]> + + + + + failed. + + Defaults to zero, i.e. any failed reduce-task results + in the job being declared as {@link JobStatus#FAILED}. + + @return the maximum percentage of reduce tasks that can fail without + the job being aborted.]]> + + + + + + failed. + + @param percent the maximum percentage of reduce tasks that can fail without + the job being aborted.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The debug script can aid debugging of failed map tasks. The script is + given task's stdout, stderr, syslog, jobconf files as arguments.

    + +

    The debug command, run on the node where the map failed, is:

    +

    + $script $stdout $stderr $syslog $jobconf. +

    + +

    The script file is distributed through {@link DistributedCache} + APIs. The script needs to be symlinked.

    + +

    Here is an example on how to submit a script +

    + job.setMapDebugScript("./myscript");
    + DistributedCache.createSymlink(job);
    + DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
    + 

    + + @param mDbgScript the script name]]> +
    +
    + + + + + + + + + The debug script can aid debugging of failed reduce tasks. The script + is given task's stdout, stderr, syslog, jobconf files as arguments.

    + +

    The debug command, run on the node where the map failed, is:

    +

    + $script $stdout $stderr $syslog $jobconf. +

    + +

    The script file is distributed through {@link DistributedCache} + APIs. The script file needs to be symlinked

    + +

    Here is an example on how to submit a script +

    + job.setReduceDebugScript("./myscript");
    + DistributedCache.createSymlink(job);
    + DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
    + 

    + + @param rDbgScript the script name]]> +
    +
    + + + + + + + + null if it hasn't + been set. + @see #setJobEndNotificationURI(String)]]> + + + + + + The uri can contain 2 special parameters: $jobId and + $jobStatus. Those, if present, are replaced by the job's + identifier and completion-status respectively.

    + +

    This is typically used by application-writers to implement chaining of + Map-Reduce jobs in an asynchronous manner.

    + + @param uri the job end notification uri + @see JobStatus + @see Job Completion and Chaining]]> +
    +
    + + + + When a job starts, a shared directory is created at location + + ${mapred.local.dir}/taskTracker/jobcache/$jobid/work/ . + This directory is exposed to the users through + job.local.dir . + So, the tasks can use this space + as scratch space and share files among them.

    + This value is available as System property also. + + @return The localized job specific shared directory]]> +
    +
    + + JobConf is the primary interface for a user to describe a + map-reduce job to the Hadoop framework for execution. The framework tries to + faithfully execute the job as-is described by JobConf, however: +
      +
    1. + Some configuration parameters might have been marked as + + final by administrators and hence cannot be altered. +
    2. +
    3. + While some job parameters are straight-forward to set + (e.g. {@link #setNumReduceTasks(int)}), some parameters interact subtly + rest of the framework and/or job-configuration and is relatively more + complex for the user to control finely (e.g. {@link #setNumMapTasks(int)}). +
    4. +

    + +

    JobConf typically specifies the {@link Mapper}, combiner + (if any), {@link Partitioner}, {@link Reducer}, {@link InputFormat} and + {@link OutputFormat} implementations to be used etc. + +

    Optionally JobConf is used to specify other advanced facets + of the job such as Comparators to be used, files to be put in + the {@link DistributedCache}, whether or not intermediate and/or job outputs + are to be compressed (and how), debugability via user-provided scripts + ( {@link #setMapDebugScript(String)}/{@link #setReduceDebugScript(String)}), + for doing post-processing on task logs, task's stdout, stderr, syslog. + and etc.

    + +

    Here is an example on how to configure a job via JobConf:

    +

    +     // Create a new JobConf
    +     JobConf job = new JobConf(new Configuration(), MyJob.class);
    +     
    +     // Specify various job-specific parameters     
    +     job.setJobName("myjob");
    +     
    +     FileInputFormat.setInputPaths(job, new Path("in"));
    +     FileOutputFormat.setOutputPath(job, new Path("out"));
    +     
    +     job.setMapperClass(MyJob.MyMapper.class);
    +     job.setCombinerClass(MyJob.MyReducer.class);
    +     job.setReducerClass(MyJob.MyReducer.class);
    +     
    +     job.setInputFormat(SequenceFileInputFormat.class);
    +     job.setOutputFormat(SequenceFileOutputFormat.class);
    + 

    + + @see JobClient + @see ClusterStatus + @see Tool + @see DistributedCache]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + .]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + any job + run on the jobtracker started at 200707121733, we would use : +
     
    + JobID.getTaskIDsPattern("200707121733", null);
    + 
    + which will return : +
     "job_200707121733_[0-9]*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @return a regex pattern matching JobIDs]]> +
    +
    + + + An example JobID is : + job_200707121733_0003 , which represents the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse JobID strings, but rather + use appropriate constructors or {@link #forName(String)} method. + + @see TaskID + @see TaskAttemptID + @see JobTracker#getNewJobId() + @see JobTracker#getStartTime()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -archives + -files inputjar args]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + zero. + + @param conf configuration for the JobTracker. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + io.file.buffer.size specified in the given + Configuration. + @param in input stream + @param conf configuration + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Output pairs need not be of the same types as input pairs. A given + input pair may map to zero or many output pairs. Output pairs are + collected with calls to + {@link OutputCollector#collect(Object,Object)}.

    + +

    Applications can use the {@link Reporter} provided to report progress + or just indicate that they are alive. In scenarios where the application + takes an insignificant amount of time to process individual key/value + pairs, this is crucial since the framework might assume that the task has + timed-out and kill that task. The other way of avoiding this is to set + + mapred.task.timeout to a high-enough value (or even zero for no + time-outs).

    + + @param key the input key. + @param value the input value. + @param output collects mapped keys and values. + @param reporter facility to report progress.]]> +
    + + + Maps are the individual tasks which transform input records into a + intermediate records. The transformed intermediate records need not be of + the same type as the input records. A given input pair may map to zero or + many output pairs.

    + +

    The Hadoop Map-Reduce framework spawns one map task for each + {@link InputSplit} generated by the {@link InputFormat} for the job. + Mapper implementations can access the {@link JobConf} for the + job via the {@link JobConfigurable#configure(JobConf)} and initialize + themselves. Similarly they can use the {@link Closeable#close()} method for + de-initialization.

    + +

    The framework then calls + {@link #map(Object, Object, OutputCollector, Reporter)} + for each key/value pair in the InputSplit for that task.

    + +

    All intermediate values associated with a given output key are + subsequently grouped by the framework, and passed to a {@link Reducer} to + determine the final output. Users can control the grouping by specifying + a Comparator via + {@link JobConf#setOutputKeyComparatorClass(Class)}.

    + +

    The grouped Mapper outputs are partitioned per + Reducer. Users can control which keys (and hence records) go to + which Reducer by implementing a custom {@link Partitioner}. + +

    Users can optionally specify a combiner, via + {@link JobConf#setCombinerClass(Class)}, to perform local aggregation of the + intermediate outputs, which helps to cut down the amount of data transferred + from the Mapper to the Reducer. + +

    The intermediate, grouped outputs are always stored in + {@link SequenceFile}s. Applications can specify if and how the intermediate + outputs are to be compressed and which {@link CompressionCodec}s are to be + used via the JobConf.

    + +

    If the job has + zero + reduces then the output of the Mapper is directly written + to the {@link FileSystem} without grouping by keys.

    + +

    Example:

    +

    +     public class MyMapper<K extends WritableComparable, V extends Writable> 
    +     extends MapReduceBase implements Mapper<K, V, K, V> {
    +     
    +       static enum MyCounters { NUM_RECORDS }
    +       
    +       private String mapTaskId;
    +       private String inputFile;
    +       private int noRecords = 0;
    +       
    +       public void configure(JobConf job) {
    +         mapTaskId = job.get("mapred.task.id");
    +         inputFile = job.get("mapred.input.file");
    +       }
    +       
    +       public void map(K key, V val,
    +                       OutputCollector<K, V> output, Reporter reporter)
    +       throws IOException {
    +         // Process the <key, value> pair (assume this takes a while)
    +         // ...
    +         // ...
    +         
    +         // Let the framework know that we are alive, and kicking!
    +         // reporter.progress();
    +         
    +         // Process some more
    +         // ...
    +         // ...
    +         
    +         // Increment the no. of <key, value> pairs processed
    +         ++noRecords;
    +
    +         // Increment counters
    +         reporter.incrCounter(NUM_RECORDS, 1);
    +        
    +         // Every 100 records update application-level status
    +         if ((noRecords%100) == 0) {
    +           reporter.setStatus(mapTaskId + " processed " + noRecords + 
    +                              " from input-file: " + inputFile); 
    +         }
    +         
    +         // Output the result
    +         output.collect(key, val);
    +       }
    +     }
    + 

    + +

    Applications may write a custom {@link MapRunnable} to exert greater + control on map processing e.g. multi-threaded Mappers etc.

    + + @see JobConf + @see InputFormat + @see Partitioner + @see Reducer + @see MapReduceBase + @see MapRunnable + @see SequenceFile]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + Provides default no-op implementations for a few methods, most non-trivial + applications need to override some of them.

    ]]> +
    +
    + + + + + + + + + + + <key, value> pairs. + +

    Mapping of input records to output records is complete when this method + returns.

    + + @param input the {@link RecordReader} to read the input records. + @param output the {@link OutputCollector} to collect the outputrecords. + @param reporter {@link Reporter} to report progress, status-updates etc. + @throws IOException]]> +
    +
    + + Custom implementations of MapRunnable can exert greater + control on map processing e.g. multi-threaded, asynchronous mappers etc.

    + + @see Mapper]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + nearly + equal content length.
    + Subclasses implement {@link #getRecordReader(InputSplit, JobConf, Reporter)} + to construct RecordReader's for MultiFileSplit's. + @see MultiFileSplit]]> +
    +
    + + + + + + + + + + + + + + + + + th Path]]> + + + + + + + + + + + th Path]]> + + + + + + + + + + + + + + + + + + + + + + + MultiFileSplit can be used to implement {@link RecordReader}'s, with + reading one record per file. + @see FileSplit + @see MultiFileInputFormat]]> + + + + + + + + + + + + + + + <key, value> pairs output by {@link Mapper}s + and {@link Reducer}s. + +

    OutputCollector is the generalization of the facility + provided by the Map-Reduce framework to collect data output by either the + Mapper or the Reducer i.e. intermediate outputs + or the output of the job.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + This is to validate the output specification for the job when it is + a job is submitted. Typically checks that it does not already exist, + throwing an exception when it already exists, so that output is not + overwritten.

    + + @param ignored + @param job job configuration. + @throws IOException when output should not be attempted]]> +
    +
    + + OutputFormat describes the output-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the OutputFormat of the + job to:

    +

      +
    1. + Validate the output-specification of the job. For e.g. check that the + output directory doesn't already exist. +
    2. + Provide the {@link RecordWriter} implementation to be used to write out + the output files of the job. Output files are stored in a + {@link FileSystem}. +
    3. +
    + + @see RecordWriter + @see JobConf]]> +
    +
    + + + + + + + + + + + + + + + + + true if the job output should be compressed, + false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Typically a hash function on a all or a subset of the key.

    + + @param key the key to be paritioned. + @param value the entry value. + @param numPartitions the total number of partitions. + @return the partition number for the key.]]> +
    +
    + + Partitioner controls the partitioning of the keys of the + intermediate map-outputs. The key (or a subset of the key) is used to derive + the partition, typically by a hash function. The total number of partitions + is the same as the number of reduce tasks for the job. Hence this controls + which of the m reduce tasks the intermediate key (and hence the + record) is sent for reduction.

    + + @see Reducer]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.0 to 1.0. + @throws IOException]]> + + + + RecordReader reads <key, value> pairs from an + {@link InputSplit}. + +

    RecordReader, typically, converts the byte-oriented view of + the input, provided by the InputSplit, and presents a + record-oriented view for the {@link Mapper} & {@link Reducer} tasks for + processing. It thus assumes the responsibility of processing record + boundaries and presenting the tasks with keys and values.

    + + @see InputSplit + @see InputFormat]]> +
    +
    + + + + + + + + + + + + + + + + RecordWriter to future operations. + + @param reporter facility to report progress. + @throws IOException]]> + + + + RecordWriter writes the output <key, value> pairs + to an output file. + +

    RecordWriter implementations write the job outputs to the + {@link FileSystem}. + + @see OutputFormat]]> + + + + + + + + + + + + + + + Reduces values for a given key. + +

    The framework calls this method for each + <key, (list of values)> pair in the grouped inputs. + Output values must be of the same type as input values. Input keys must + not be altered. The framework will reuse the key and value objects + that are passed into the reduce, therefore the application should clone + the objects they want to keep a copy of. In many cases, all values are + combined into zero or one value. +

    + +

    Output pairs are collected with calls to + {@link OutputCollector#collect(Object,Object)}.

    + +

    Applications can use the {@link Reporter} provided to report progress + or just indicate that they are alive. In scenarios where the application + takes an insignificant amount of time to process individual key/value + pairs, this is crucial since the framework might assume that the task has + timed-out and kill that task. The other way of avoiding this is to set + + mapred.task.timeout to a high-enough value (or even zero for no + time-outs).

    + + @param key the key. + @param values the list of values to reduce. + @param output to collect keys and combined values. + @param reporter facility to report progress.]]> +
    + + + The number of Reducers for the job is set by the user via + {@link JobConf#setNumReduceTasks(int)}. Reducer implementations + can access the {@link JobConf} for the job via the + {@link JobConfigurable#configure(JobConf)} method and initialize themselves. + Similarly they can use the {@link Closeable#close()} method for + de-initialization.

    + +

    Reducer has 3 primary phases:

    +
      +
    1. + +

      Shuffle

      + +

      Reducer is input the grouped output of a {@link Mapper}. + In the phase the framework, for each Reducer, fetches the + relevant partition of the output of all the Mappers, via HTTP. +

      +
    2. + +
    3. +

      Sort

      + +

      The framework groups Reducer inputs by keys + (since different Mappers may have output the same key) in this + stage.

      + +

      The shuffle and sort phases occur simultaneously i.e. while outputs are + being fetched they are merged.

      + +
      SecondarySort
      + +

      If equivalence rules for keys while grouping the intermediates are + different from those for grouping keys before reduction, then one may + specify a Comparator via + {@link JobConf#setOutputValueGroupingComparator(Class)}.Since + {@link JobConf#setOutputKeyComparatorClass(Class)} can be used to + control how intermediate keys are grouped, these can be used in conjunction + to simulate secondary sort on values.

      + + + For example, say that you want to find duplicate web pages and tag them + all with the url of the "best" known example. You would set up the job + like: +
        +
      • Map Input Key: url
      • +
      • Map Input Value: document
      • +
      • Map Output Key: document checksum, url pagerank
      • +
      • Map Output Value: url
      • +
      • Partitioner: by checksum
      • +
      • OutputKeyComparator: by checksum and then decreasing pagerank
      • +
      • OutputValueGroupingComparator: by checksum
      • +
      +
    4. + +
    5. +

      Reduce

      + +

      In this phase the + {@link #reduce(Object, Iterator, OutputCollector, Reporter)} + method is called for each <key, (list of values)> pair in + the grouped inputs.

      +

      The output of the reduce task is typically written to the + {@link FileSystem} via + {@link OutputCollector#collect(Object, Object)}.

      +
    6. +
    + +

    The output of the Reducer is not re-sorted.

    + +

    Example:

    +

    +     public class MyReducer<K extends WritableComparable, V extends Writable> 
    +     extends MapReduceBase implements Reducer<K, V, K, V> {
    +     
    +       static enum MyCounters { NUM_RECORDS }
    +        
    +       private String reduceTaskId;
    +       private int noKeys = 0;
    +       
    +       public void configure(JobConf job) {
    +         reduceTaskId = job.get("mapred.task.id");
    +       }
    +       
    +       public void reduce(K key, Iterator<V> values,
    +                          OutputCollector<K, V> output, 
    +                          Reporter reporter)
    +       throws IOException {
    +       
    +         // Process
    +         int noValues = 0;
    +         while (values.hasNext()) {
    +           V value = values.next();
    +           
    +           // Increment the no. of values for this key
    +           ++noValues;
    +           
    +           // Process the <key, value> pair (assume this takes a while)
    +           // ...
    +           // ...
    +           
    +           // Let the framework know that we are alive, and kicking!
    +           if ((noValues%10) == 0) {
    +             reporter.progress();
    +           }
    +         
    +           // Process some more
    +           // ...
    +           // ...
    +           
    +           // Output the <key, value> 
    +           output.collect(key, value);
    +         }
    +         
    +         // Increment the no. of <key, list of values> pairs processed
    +         ++noKeys;
    +         
    +         // Increment counters
    +         reporter.incrCounter(NUM_RECORDS, 1);
    +         
    +         // Every 100 keys update application-level status
    +         if ((noKeys%100) == 0) {
    +           reporter.setStatus(reduceTaskId + " processed " + noKeys);
    +         }
    +       }
    +     }
    + 

    + + @see Mapper + @see Partitioner + @see Reporter + @see MapReduceBase]]> +
    +
    + + + + + + + + + + + + + + + Enum. + @param amount A non-negative amount by which the counter is to + be incremented.]]> + + + + + + + + + + + + + + InputSplit that the map is reading from. + @throws UnsupportedOperationException if called outside a mapper]]> + + + + + + + + + {@link Mapper} and {@link Reducer} can use the Reporter + provided to report progress or just indicate that they are alive. In + scenarios where the application takes an insignificant amount of time to + process individual key/value pairs, this is crucial since the framework + might assume that the task has timed-out and kill that task. + +

    Applications can also update {@link Counters} via the provided + Reporter .

    + + @see Progressable + @see Counters]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + progress of the job's map-tasks, as a float between 0.0 + and 1.0. When all map tasks have completed, the function returns 1.0. + + @return the progress of the job's map-tasks. + @throws IOException]]> + + + + + + progress of the job's reduce-tasks, as a float between 0.0 + and 1.0. When all reduce tasks have completed, the function returns 1.0. + + @return the progress of the job's reduce-tasks. + @throws IOException]]> + + + + + + true if the job is complete, else false. + @throws IOException]]> + + + + + + true if the job succeeded, else false. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RunningJob is the user-interface to query for details on a + running Map-Reduce job. + +

    Clients can get hold of RunningJob via the {@link JobClient} + and then query the running-job for details such as name, configuration, + progress etc.

    + + @see JobClient]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This allows the user to specify the key class to be different + from the actual class ({@link BytesWritable}) used for writing

    + + @param conf the {@link JobConf} to modify + @param theClass the SequenceFile output key class.]]> +
    +
    + + + + + This allows the user to specify the value class to be different + from the actual class ({@link BytesWritable}) used for writing

    + + @param conf the {@link JobConf} to modify + @param theClass the SequenceFile output key class.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + f. The filtering criteria is + MD5(key) % f == 0.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + f using + the criteria record# % f == 0. + For example, if the frequency is 10, one out of 10 records is returned.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + . + @param name The name of the server + @param port The port to use on the server + @param findPort whether the server should start at the given port and + increment by 1 until it finds a free port.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + points to the log directory + "/static/" -> points to common static files (src/webapps/static) + "/" -> the jsp server code from (src/webapps/)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + all task attempt IDs + of any jobtracker, in any job, of the first + map task, we would use : +
     
    + TaskAttemptID.getTaskAttemptIDsPattern(null, null, true, 1, null);
    + 
    + which will return : +
     "attempt_[^_]*_[0-9]*_m_000001_[0-9]*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @param isMap whether the tip is a map, or null + @param taskId taskId number, or null + @param attemptId the task attempt number, or null + @return a regex pattern matching TaskAttemptIDs]]> +
    +
    + + + An example TaskAttemptID is : + attempt_200707121733_0003_m_000005_0 , which represents the + zeroth task attempt for the fifth map task in the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse TaskAttemptID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the first map task + of any jobtracker, of any job, we would use : +

     
    + TaskID.getTaskIDsPattern(null, null, true, 1);
    + 
    + which will return : +
     "task_[^_]*_[0-9]*_m_000001*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @param isMap whether the tip is a map, or null + @param taskId taskId number, or null + @return a regex pattern matching TaskIDs]]> +
    + + + + An example TaskID is : + task_200707121733_0003_m_000005 , which represents the + fifth map task in the third job running at the jobtracker + started at 200707121733. +

    + Applications should never construct or parse TaskID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskAttemptID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hadoop.log.dir.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the Job was added.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ([,]*) + func ::= tbl(,"") + class ::= @see java.lang.Class#forName(java.lang.String) + path ::= @see org.apache.hadoop.fs.Path#Path(java.lang.String) + } + Reads expression from the mapred.join.expr property and + user-supplied join types from mapred.join.define.<ident> + types. Paths supplied to tbl are given as input paths to the + InputFormat class listed. + @see #compose(java.lang.String, java.lang.Class, java.lang.String...)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ,

    ) }]]> + + + + + + + + (tbl(,),tbl(,),...,tbl(,)) }]]> + + + + + + + + (tbl(,),tbl(,),...,tbl(,)) }]]> + + + + mapred.join.define.<ident> to a classname. In the expression + mapred.join.expr, the identifier will be assumed to be a + ComposableRecordReader. + mapred.join.keycomparator can be a classname used to compare keys + in the join. + @see JoinRecordReader + @see MultiFilterRecordReader]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ...... + }]]> + + + + + + + + + + + + + + + + + + + + + capacity children to position + id in the parent reader. + The id of a root CompositeRecordReader is -1 by convention, but relying + on this is not recommended.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + override(S1,S2,S3) will prefer values + from S3 over S2, and values from S2 over S1 for all keys + emitted from all sources.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [,,...,]]]> + + + + + + + out. + TupleWritable format: + {@code + ...... + }]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It can be used instead of the default implementation, + @link org.apache.hadoop.mapred.MapRunner, when the Map operation is not CPU + bound in order to improve throughput. +

    + Map implementations using this MapRunnable must be thread-safe. +

    + The Map-Reduce job has to be configured to use this MapRunnable class (using + the JobConf.setMapRunnerClass method) and + the number of thread the thread-pool can use with the + mapred.map.multithreadedrunner.threads property, its default + value is 10 threads. +

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + pairs. Uses + {@link StringTokenizer} to break text into tokens.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + generateKeyValPairs(Object key, Object value); public void + configure(JobConfjob); } + + The package also provides a base class, ValueAggregatorBaseDescriptor, + implementing the above interface. The user can extend the base class and + implement generateKeyValPairs accordingly. + + The primary work of generateKeyValPairs is to emit one or more key/value + pairs based on the input key/value pair. The key in an output key/value pair + encode two pieces of information: aggregation type and aggregation id. The + value will be aggregated onto the aggregation id according the aggregation + type. + + This class offers a function to generate a map/reduce job using Aggregate + framework. The function takes the following parameters: input directory spec + input format (text or sequence file) output directory a file specifying the + user plugin class]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When constructing the instance, if the factory property + contextName.class exists, + its value is taken to be the name of the class to instantiate. Otherwise, + the default is to create an instance of + org.apache.hadoop.metrics.spi.NullContext, which is a + dummy "no-op" context which will cause all metric data to be discarded. + + @param contextName the name of the context + @return the named MetricsContext]]> + + + + + + + + + + + + + + When the instance is constructed, this method checks if the file + hadoop-metrics.properties exists on the class path. If it + exists, it must be in the format defined by java.util.Properties, and all + the properties in the file are set as attributes on the newly created + ContextFactory instance. + + @return the singleton ContextFactory instance]]> + + + + getFactory() method.]]> + + + + + + + + + + + + + + + + + + + startMonitoring() again after calling + this. + @see #close()]]> + + + + + + + + + + + + + + + + recordName. + Throws an exception if the metrics implementation is configured with a fixed + set of record names and recordName is not in that set. + + @param recordName the name of the record + @throws MetricsException if recordName conflicts with configuration data]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A record name identifies the kind of data to be reported. For example, a + program reporting statistics relating to the disks on a computer might use + a record name "diskStats".

    + + A record has zero or more tags. A tag has a name and a value. To + continue the example, the "diskStats" record might use a tag named + "diskName" to identify a particular disk. Sometimes it is useful to have + more than one tag, so there might also be a "diskType" with value "ide" or + "scsi" or whatever.

    + + A record also has zero or more metrics. These are the named + values that are to be reported to the metrics system. In the "diskStats" + example, possible metric names would be "diskPercentFull", "diskPercentBusy", + "kbReadPerSecond", etc.

    + + The general procedure for using a MetricsRecord is to fill in its tag and + metric values, and then call update() to pass the record to the + client library. + Metric data is not immediately sent to the metrics system + each time that update() is called. + An internal table is maintained, identified by the record name. This + table has columns + corresponding to the tag and the metric names, and rows + corresponding to each unique set of tag values. An update + either modifies an existing row in the table, or adds a new row with a set of + tag values that are different from all the other rows. Note that if there + are no tags, then there can be at most one row in the table.

    + + Once a row is added to the table, its data will be sent to the metrics system + on every timer period, whether or not it has been updated since the previous + timer period. If this is inappropriate, for example if metrics were being + reported by some transient object in an application, the remove() + method can be used to remove the row and thus stop the data from being + sent.

    + + Note that the update() method is atomic. This means that it is + safe for different threads to be updating the same metric. More precisely, + it is OK for different threads to call update() on MetricsRecord instances + with the same set of tag names and tag values. Different threads should + not use the same MetricsRecord instance at the same time.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MetricsContext.registerUpdater().]]> + + + + + + + + + + + + + + + + + + + + + + + + + fileName attribute, + if specified. Otherwise the data will be written to standard + output.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is configured by setting ContextFactory attributes which in turn + are usually configured through a properties file. All the attributes are + prefixed by the contextName. For example, the properties file might contain: +

    + myContextName.fileName=/tmp/metrics.log
    + myContextName.period=5
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + contextName.tableName. The returned map consists of + those attributes with the contextName and tableName stripped off.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + recordName. + Throws an exception if the metrics implementation is configured with a fixed + set of record names and recordName is not in that set. + + @param recordName the name of the record + @throws MetricsException if recordName conflicts with configuration data]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class implements the internal table of metric data, and the timer + on which data is to be sent to the metrics system. Subclasses must + override the abstract emitRecord method in order to transmit + the data.

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + update + and remove().]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hostname or hostname:port. If + the specs string is null, defaults to localhost:defaultPort. + + @return a list of InetSocketAddress objects.]]> + + + + + + + + + + + + + + + + + + + ,name=" + Where the and are the supplied parameters + + @param serviceName + @param nameName + @param theMbean - the MBean to register + @return the named used to register the MBean]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hadoop.rpc.socket.factory.class.<ClassName>. When no + such parameter exists then fall back on the default socket factory as + configured by hadoop.rpc.socket.factory.class.default. If + this default socket factory is not configured, then fall back on the JVM + default socket factory. + + @param conf the configuration + @param clazz the class (usually a {@link VersionedProtocol}) + @return a socket factory]]> + + + + + + hadoop.rpc.socket.factory.default + + @param conf the configuration + @return the default socket factory as specified in the configuration or + the JVM default socket factory if the configuration does not + contain a default socket factory property.]]> + + + + + + + + + + + + + : + ://:/]]> + + + + + + + + : + ://:/]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + From documentation for {@link #getInputStream(Socket, long)}:
    + Returns InputStream for the socket. If the socket has an associated + SocketChannel then it returns a + {@link SocketInputStream} with the given timeout. If the socket does not + have a channel, {@link Socket#getInputStream()} is returned. In the later + case, the timeout argument is ignored and the timeout set with + {@link Socket#setSoTimeout(int)} applies for reads.

    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getInputStream()}. + + @see #getInputStream(Socket, long) + + @param socket + @return InputStream for reading from the socket. + @throws IOException]]> +
    +
    + + + + + +
    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getInputStream()}. + + @see Socket#getChannel() + + @param socket + @param timeout timeout in milliseconds. This may not always apply. zero + for waiting as long as necessary. + @return InputStream for reading from the socket. + @throws IOException]]> +
    +
    + + + + +
    + + From documentation for {@link #getOutputStream(Socket, long)} :
    + Returns OutputStream for the socket. If the socket has an associated + SocketChannel then it returns a + {@link SocketOutputStream} with the given timeout. If the socket does not + have a channel, {@link Socket#getOutputStream()} is returned. In the later + case, the timeout argument is ignored and the write will wait until + data is available.

    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getOutputStream()}. + + @see #getOutputStream(Socket, long) + + @param socket + @return OutputStream for writing to the socket. + @throws IOException]]> +
    +
    + + + + + +
    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getOutputStream()}. + + @see Socket#getChannel() + + @param socket + @param timeout timeout in milliseconds. This may not always apply. zero + for waiting as long as necessary. + @return OutputStream for writing to the socket. + @throws IOException]]> +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + node + + @param node + a node + @return true if node is already in the tree; false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + scope + if scope starts with ~, choose one from the all nodes except for the + ones in scope; otherwise, choose one from scope + @param scope range of nodes from which a node will be choosen + @return the choosen node]]> + + + + + + + scope but not in excludedNodes + if scope starts with ~, return the number of nodes that are not + in scope and excludedNodes; + @param scope a path string that may start with ~ + @param excludedNodes a list of nodes + @return number of available nodes]]> + + + + + + + + + + + + reader + It linearly scans the array, if a local node is found, swap it with + the first element of the array. + If a local rack node is found, swap it with the first element following + the local node. + If neither local node or local rack node is found, put a random replica + location at postion 0. + It leaves the rest nodes untouched.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + Create a new input stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + + @see SocketInputStream#SocketInputStream(ReadableByteChannel, long) + + @param socket should have a channel associated with it. + @param timeout timeout timeout in milliseconds. must not be negative. + @throws IOException]]> +
    +
    + + + +
    + + Create a new input stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + @see SocketInputStream#SocketInputStream(ReadableByteChannel, long) + + @param socket should have a channel associated with it. + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + +
    + + Create a new ouput stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + + @see SocketOutputStream#SocketOutputStream(WritableByteChannel, long) + + @param socket should have a channel associated with it. + @param timeout timeout timeout in milliseconds. must not be negative. + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + = getCount(). + @param newCapacity The new capacity in bytes.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Index idx = startVector(...); + while (!idx.done()) { + .... // read element of a vector + idx.incr(); + } + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This task takes the given record definition files and compiles them into + java or c++ + files. It is then up to the user to compile the generated files. + +

    The task requires the file or the nested fileset element to be + specified. Optional attributes are language (set the output + language, default is "java"), + destdir (name of the destination directory for generated java/c++ + code, default is ".") and failonerror (specifies error handling + behavior. default is true). +

    Usage

    +
    + <recordcc
    +       destdir="${basedir}/gensrc"
    +       language="java">
    +   <fileset include="**\/*.jr" />
    + </recordcc>
    + 
    ]]> +
    +
    + +
    + + + + + + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ugi as a comma separated string in + conf as a property attr + + The String starts with the user name followed by the default group names, + and other group names. + + @param conf configuration + @param attr property name + @param ugi a UnixUserGroupInformation]]> + + + + + + + + conf + + The object is expected to store with the property name attr + as a comma separated string that starts + with the user name followed by group names. + If the property name is not defined, return null. + It's assumed that there is only one UGI per user. If this user already + has a UGI in the ugi map, return the ugi in the map. + Otherwise, construct a UGI from the configuration, store it in the + ugi map and return it. + + @param conf configuration + @param attr property name + @return a UnixUGI + @throws LoginException if the stored string is ill-formatted.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This tool supports archiving and anaylzing (sort/grep) of log-files. + It takes as input + a) Input uri which will serve uris of the logs to be archived. + b) Output directory (not mandatory). + b) Directory on dfs to archive the logs. + c) The sort/grep patterns for analyzing the files and separator for boundaries. + Usage: + Logalyzer -archive -archiveDir -analysis -logs -grep -sort -separator +

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GenericOptionsParser to parse only the generic Hadoop + arguments. + + The array of string arguments other than the generic arguments can be + obtained by {@link #getRemainingArgs()}. + + @param conf the Configuration to modify. + @param args command-line arguments.]]> + + + + + GenericOptionsParser to parse given options as well + as generic Hadoop options. + + The resulting CommandLine object can be obtained by + {@link #getCommandLine()}. + + @param conf the configuration to modify + @param options options built by the caller + @param args User-specified arguments]]> + + + + + Strings containing the un-parsed arguments + or empty array if commandLine was not defined.]]> + + + + + CommandLine object + to process the parsed arguments. + + Note: If the object is created with + {@link #GenericOptionsParser(Configuration, String[])}, then returned + object will only contain parsed generic options. + + @return CommandLine representing list of arguments + parsed against Options descriptor.]]> + + + + + + + + + + GenericOptionsParser is a utility to parse command line + arguments generic to the Hadoop framework. + + GenericOptionsParser recognizes several standarad command + line arguments, enabling applications to easily specify a namenode, a + jobtracker, additional configuration resources etc. + +

    Generic Options

    + +

    The supported generic options are:

    +

    +     -conf <configuration file>     specify a configuration file
    +     -D <property=value>            use value for given property
    +     -fs <local|namenode:port>      specify a namenode
    +     -jt <local|jobtracker:port>    specify a job tracker
    +     -files <comma separated list of files>    specify comma separated
    +                            files to be copied to the map reduce cluster
    +     -libjars <comma separated list of jars>   specify comma separated
    +                            jar files to include in the classpath.
    +     -archives <comma separated list of archives>    specify comma
    +             separated archives to be unarchived on the compute machines.
    +
    + 

    + +

    The general command line syntax is:

    +

    + bin/hadoop command [genericOptions] [commandOptions]
    + 

    + +

    Generic command line arguments might modify + Configuration objects, given to constructors.

    + +

    The functionality is implemented using Commons CLI.

    + +

    Examples:

    +

    + $ bin/hadoop dfs -fs darwin:8020 -ls /data
    + list /data directory in dfs with namenode darwin:8020
    + 
    + $ bin/hadoop dfs -D fs.default.name=darwin:8020 -ls /data
    + list /data directory in dfs with namenode darwin:8020
    +     
    + $ bin/hadoop dfs -conf hadoop-site.xml -ls /data
    + list /data directory in dfs with conf specified in hadoop-site.xml
    +     
    + $ bin/hadoop job -D mapred.job.tracker=darwin:50020 -submit job.xml
    + submit a job to job tracker darwin:50020
    +     
    + $ bin/hadoop job -jt darwin:50020 -submit job.xml
    + submit a job to job tracker darwin:50020
    +     
    + $ bin/hadoop job -jt local -submit job.xml
    + submit a job to local runner
    + 
    + $ bin/hadoop jar -libjars testlib.jar 
    + -archives test.tgz -files file.txt inputjar args
    + job submission with libjars, files and archives
    + 

    + + @see Tool + @see ToolRunner]]> +
    +
    + + + + + + + + + Class<T>) of the + argument of type T. + @param The type of the argument + @param t the object to get it class + @return Class<T>]]> + + + + + + + List<T> to a an array of + T[]. + @param c the Class object of the items in the list + @param list the list to convert]]> + + + + + + List<T> to a an array of + T[]. + @param list the list to convert + @throws ArrayIndexOutOfBoundsException if the list is empty. + Use {@link #toArray(Class, List)} if the list may be empty.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if native-hadoop is loaded, + else false]]> + + + + + + true if native hadoop libraries, if present, can be + used for this job; false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { pq.top().change(); pq.adjustTop(); } + instead of
    +  { o = pq.pop(); o.change(); pq.push(o); }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Clients and/or applications can use the provided Progressable + to explicitly report progress to the Hadoop framework. This is especially + important for operations which take an insignificant amount of time since, + in-lieu of the reported progress, the framework has to assume that an error + has occured and time-out the operation.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Class is to be obtained + @return the correctly typed Class of the given object.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Hadoop Pipes + or Hadoop Streaming. + + It also checks to ensure that we are running on a *nix platform else + (e.g. in Cygwin/Windows) it returns null. + @param job job configuration + @return a String[] with the ulimit command arguments or + null if we are running on a non *nix platform or + if the limit is unspecified.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Shell interface. + @param cmd shell command to execute. + @return the output of the executed command.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + Shell can be used to run unix commands like du or + df. It also offers facilities to gate commands by + time-intervals.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ShellCommandExecutorshould be used in cases where the output + of the command needs no explicit parsing and where the command, working + directory and the environment remains unchanged. The output of the command + is stored as-is and is expected to be small.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ArrayList of string values]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + charToEscape in the string + with the escape char escapeChar + + @param str string + @param escapeChar escape char + @param charToEscape the char to be escaped + @return an escaped string]]> + + + + + + + + + + + + + + charToEscape in the string + with the escape char escapeChar + + @param str string + @param escapeChar escape char + @param charToEscape the escaped char + @return an unescaped string]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tool, is the standard for any Map-Reduce tool/application. + The tool/application should delegate the handling of + + standard command-line options to {@link ToolRunner#run(Tool, String[])} + and only handle its custom arguments.

    + +

    Here is how a typical Tool is implemented:

    +

    +     public class MyApp extends Configured implements Tool {
    +     
    +       public int run(String[] args) throws Exception {
    +         // Configuration processed by ToolRunner
    +         Configuration conf = getConf();
    +         
    +         // Create a JobConf using the processed conf
    +         JobConf job = new JobConf(conf, MyApp.class);
    +         
    +         // Process custom command-line options
    +         Path in = new Path(args[1]);
    +         Path out = new Path(args[2]);
    +         
    +         // Specify various job-specific parameters     
    +         job.setJobName("my-app");
    +         job.setInputPath(in);
    +         job.setOutputPath(out);
    +         job.setMapperClass(MyApp.MyMapper.class);
    +         job.setReducerClass(MyApp.MyReducer.class);
    +
    +         // Submit the job, then poll for progress until the job is complete
    +         JobClient.runJob(job);
    +       }
    +       
    +       public static void main(String[] args) throws Exception {
    +         // Let ToolRunner handle generic command-line options 
    +         int res = ToolRunner.run(new Configuration(), new Sort(), args);
    +         
    +         System.exit(res);
    +       }
    +     }
    + 

    + + @see GenericOptionsParser + @see ToolRunner]]> +
    +
    + + + + + + + + + + + + Tool by {@link Tool#run(String[])}, after + parsing with the given generic arguments. Uses the given + Configuration, or builds one if null. + + Sets the Tool's configuration with the possibly modified + version of the conf. + + @param conf Configuration for the Tool. + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + Tool with its Configuration. + + Equivalent to run(tool.getConf(), tool, args). + + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + + + ToolRunner can be used to run classes implementing + Tool interface. It works in conjunction with + {@link GenericOptionsParser} to parse the + + generic hadoop command line arguments and modifies the + Configuration of the Tool. The + application-specific options are passed along without being modified. +

    + + @see Tool + @see GenericOptionsParser]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + +
    diff --git a/aarch64/share/hadoop/common/jdiff/hadoop_0.19.0.xml b/aarch64/share/hadoop/common/jdiff/hadoop_0.19.0.xml new file mode 100644 index 0000000..557ac3c --- /dev/null +++ b/aarch64/share/hadoop/common/jdiff/hadoop_0.19.0.xml @@ -0,0 +1,43972 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + final. + + @param name resource to be added, the classpath is examined for a file + with that name.]]> + + + + + + final. + + @param url url of the resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + final. + + @param file file-path of resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + final. + + @param in InputStream to deserialize the object from.]]> + + + + + + + + + + + name property, null if + no such property exists. + + Values are processed for variable expansion + before being returned. + + @param name the property name. + @return the value of the name property, + or null if no such property exists.]]> + + + + + + name property, without doing + variable expansion. + + @param name the property name. + @return the value of the name property, + or null if no such property exists.]]> + + + + + + + value of the name property. + + @param name property name. + @param value property value.]]> + + + + + + + name property. If no such property + exists, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value, or defaultValue if the property + doesn't exist.]]> + + + + + + + name property as an int. + + If no such property exists, or if the specified value is not a valid + int, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as an int, + or defaultValue.]]> + + + + + + + name property to an int. + + @param name property name. + @param value int value of the property.]]> + + + + + + + name property as a long. + If no such property is specified, or if the specified value is not a valid + long, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a long, + or defaultValue.]]> + + + + + + + name property to a long. + + @param name property name. + @param value long value of the property.]]> + + + + + + + name property as a float. + If no such property is specified, or if the specified value is not a valid + float, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a float, + or defaultValue.]]> + + + + + + + name property as a boolean. + If no such property is specified, or if the specified value is not a valid + boolean, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a boolean, + or defaultValue.]]> + + + + + + + name property to a boolean. + + @param name property name. + @param value boolean value of the property.]]> + + + + + + + + + + + + + name property as + a collection of Strings. + If no such property is specified then empty collection is returned. +

    + This is an optimized version of {@link #getStrings(String)} + + @param name property name. + @return property value as a collection of Strings.]]> + + + + + + name property as + an array of Strings. + If no such property is specified then null is returned. + + @param name property name. + @return property value as an array of Strings, + or null.]]> + + + + + + + name property as + an array of Strings. + If no such property is specified then default value is returned. + + @param name property name. + @param defaultValue The default value + @return property value as an array of Strings, + or default value.]]> + + + + + + + name property as + as comma delimited values. + + @param name property name. + @param values The values]]> + + + + + + + + + + + + + + name property + as an array of Class. + The value of the property specifies a list of comma separated class names. + If no such property is specified, then defaultValue is + returned. + + @param name the property name. + @param defaultValue default value. + @return property value as a Class[], + or defaultValue.]]> + + + + + + + name property as a Class. + If no such property is specified, then defaultValue is + returned. + + @param name the class name. + @param defaultValue default value. + @return property value as a Class, + or defaultValue.]]> + + + + + + + + name property as a Class + implementing the interface specified by xface. + + If no such property is specified, then defaultValue is + returned. + + An exception is thrown if the returned class does not implement the named + interface. + + @param name the class name. + @param defaultValue default value. + @param xface the interface implemented by the named class. + @return property value as a Class, + or defaultValue.]]> + + + + + + + + name property to the name of a + theClass implementing the given interface xface. + + An exception is thrown if theClass does not implement the + interface xface. + + @param name property name. + @param theClass property value. + @param xface the interface implemented by the named class.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + + + + + name. + + @param name configuration resource name. + @return an input stream attached to the resource.]]> + + + + + + name. + + @param name configuration resource name. + @return a reader attached to the resource.]]> + + + + + + + + + + + + + + + String + key-value pairs in the configuration. + + @return an iterator over the entries.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + true to set quiet-mode on, false + to turn it off.]]> + + + + + + + + + + + + + + + + + + + Resources + +

    Configurations are specified by resources. A resource contains a set of + name/value pairs as XML data. Each resource is named by either a + String or by a {@link Path}. If named by a String, + then the classpath is examined for a file with that name. If named by a + Path, then the local filesystem is examined directly, without + referring to the classpath. + +

    Unless explicitly turned off, Hadoop by default specifies two + resources, loaded in-order from the classpath:

      +
    1. hadoop-default.xml + : Read-only defaults for hadoop.
    2. +
    3. hadoop-site.xml: Site-specific configuration for a given hadoop + installation.
    4. +
    + Applications may add additional resources, which are loaded + subsequent to these resources in the order they are added. + +

    Final Parameters

    + +

    Configuration parameters may be declared final. + Once a resource declares a value final, no subsequently-loaded + resource can alter that value. + For example, one might define a final parameter with: +

    +  <property>
    +    <name>dfs.client.buffer.dir</name>
    +    <value>/tmp/hadoop/dfs/client</value>
    +    <final>true</final>
    +  </property>
    + + Administrators typically define parameters as final in + hadoop-site.xml for values that user applications may not alter. + +

    Variable Expansion

    + +

    Value strings are first processed for variable expansion. The + available properties are:

      +
    1. Other properties defined in this Configuration; and, if a name is + undefined here,
    2. +
    3. Properties in {@link System#getProperties()}.
    4. +
    + +

    For example, if a configuration resource contains the following property + definitions: +

    +  <property>
    +    <name>basedir</name>
    +    <value>/user/${user.name}</value>
    +  </property>
    +  
    +  <property>
    +    <name>tempdir</name>
    +    <value>${basedir}/tmp</value>
    +  </property>
    + + When conf.get("tempdir") is called, then ${basedir} + will be resolved to another property in this Configuration, while + ${user.name} would then ordinarily be resolved to the value + of the System property with that name.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DistributedCache is a facility provided by the Map-Reduce + framework to cache files (text, archives, jars etc.) needed by applications. +

    + +

    Applications specify the files, via urls (hdfs:// or http://) to be cached + via the {@link org.apache.hadoop.mapred.JobConf}. + The DistributedCache assumes that the + files specified via hdfs:// urls are already present on the + {@link FileSystem} at the path specified by the url.

    + +

    The framework will copy the necessary files on to the slave node before + any tasks for the job are executed on that node. Its efficiency stems from + the fact that the files are only copied once per job and the ability to + cache archives which are un-archived on the slaves.

    + +

    DistributedCache can be used to distribute simple, read-only + data/text files and/or more complex types such as archives, jars etc. + Archives (zip, tar and tgz/tar.gz files) are un-archived at the slave nodes. + Jars may be optionally added to the classpath of the tasks, a rudimentary + software distribution mechanism. Files have execution permissions. + Optionally users can also direct it to symlink the distributed cache file(s) + into the working directory of the task.

    + +

    DistributedCache tracks modification timestamps of the cache + files. Clearly the cache files should not be modified by the application + or externally while the job is executing.

    + +

    Here is an illustrative example on how to use the + DistributedCache:

    +

    +     // Setting up the cache for the application
    +     
    +     1. Copy the requisite files to the FileSystem:
    +     
    +     $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat  
    +     $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip  
    +     $ bin/hadoop fs -copyFromLocal mylib.jar /myapp/mylib.jar
    +     $ bin/hadoop fs -copyFromLocal mytar.tar /myapp/mytar.tar
    +     $ bin/hadoop fs -copyFromLocal mytgz.tgz /myapp/mytgz.tgz
    +     $ bin/hadoop fs -copyFromLocal mytargz.tar.gz /myapp/mytargz.tar.gz
    +     
    +     2. Setup the application's JobConf:
    +     
    +     JobConf job = new JobConf();
    +     DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"), 
    +                                   job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/map.zip", job);
    +     DistributedCache.addFileToClassPath(new Path("/myapp/mylib.jar"), job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytar.tar", job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytgz.tgz", job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytargz.tar.gz", job);
    +     
    +     3. Use the cached files in the {@link org.apache.hadoop.mapred.Mapper}
    +     or {@link org.apache.hadoop.mapred.Reducer}:
    +     
    +     public static class MapClass extends MapReduceBase  
    +     implements Mapper<K, V, K, V> {
    +     
    +       private Path[] localArchives;
    +       private Path[] localFiles;
    +       
    +       public void configure(JobConf job) {
    +         // Get the cached archives/files
    +         localArchives = DistributedCache.getLocalCacheArchives(job);
    +         localFiles = DistributedCache.getLocalCacheFiles(job);
    +       }
    +       
    +       public void map(K key, V value, 
    +                       OutputCollector<K, V> output, Reporter reporter) 
    +       throws IOException {
    +         // Use data from the cached archives/files here
    +         // ...
    +         // ...
    +         output.collect(k, v);
    +       }
    +     }
    +     
    + 

    + + @see org.apache.hadoop.mapred.JobConf + @see org.apache.hadoop.mapred.JobClient]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BufferedFSInputStream + with the specified buffer size, + and saves its argument, the input stream + in, for later use. An internal + buffer array of length size + is created and stored in buf. + + @param in the underlying input stream. + @param size the buffer size. + @exception IllegalArgumentException if size <= 0.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + setReplication of FileSystem + @param src file name + @param replication new replication + @throws IOException + @return true if successful; + false if file does not exist or is a directory]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ' + @deprecated Consider using {@link GenericOptionsParser} instead.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + fs.scheme.class whose value names the FileSystem class. + The entire URI is passed to the FileSystem instance's initialize method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Return all the files that match filePattern and are not checksum + files. Results are sorted by their names. + +

    + A filename pattern is composed of regular characters and + special pattern matching characters, which are: + +

    +
    +
    +

    +

    ? +
    Matches any single character. + +

    +

    * +
    Matches zero or more characters. + +

    +

    [abc] +
    Matches a single character from character set + {a,b,c}. + +

    +

    [a-b] +
    Matches a single character from the character range + {a...b}. Note that character a must be + lexicographically less than or equal to character b. + +

    +

    [^a] +
    Matches a single character that is not from character set or range + {a}. Note that the ^ character must occur + immediately to the right of the opening bracket. + +

    +

    \c +
    Removes (escapes) any special meaning of character c. + +

    +

    {ab,cd} +
    Matches a string from the string set {ab, cd} + +

    +

    {ab,c{de,fh}} +
    Matches a string from the string set {ab, cde, cfh} + +
    +
    +
    + + @param pathPattern a regular expression specifying a pth pattern + + @return an array of paths that match the path pattern + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + All user code that may potentially use the Hadoop Distributed + File System should be written to use a FileSystem object. The + Hadoop DFS is a multi-machine system that appears as a single + disk. It's useful because of its fault tolerance and potentially + very large capacity. + +

    + The local implementation is {@link LocalFileSystem} and distributed + implementation is DistributedFileSystem.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FilterFileSystem contains + some other file system, which it uses as + its basic file system, possibly transforming + the data along the way or providing additional + functionality. The class FilterFileSystem + itself simply overrides all methods of + FileSystem with versions that + pass all requests to the contained file + system. Subclasses of FilterFileSystem + may further override some of these methods + and may also provide additional methods + and fields.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + buf at offset + and checksum into checksum. + The method is used for implementing read, therefore, it should be optimized + for sequential reading + @param pos chunkPos + @param buf desitination buffer + @param offset offset in buf at which to store data + @param len maximun number of bytes to read + @return number of bytes read]]> + + + + + + + + + + + + + + + + + -1 if the end of the + stream is reached. + @exception IOException if an I/O error occurs.]]> + + + + + + + + + This method implements the general contract of the corresponding + {@link InputStream#read(byte[], int, int) read} method of + the {@link InputStream} class. As an additional + convenience, it attempts to read as many bytes as possible by repeatedly + invoking the read method of the underlying stream. This + iterated read continues until one of the following + conditions becomes true:

      + +
    • The specified number of bytes have been read, + +
    • The read method of the underlying stream returns + -1, indicating end-of-file. + +
    If the first read on the underlying stream returns + -1 to indicate end-of-file then this method returns + -1. Otherwise this method returns the number of bytes + actually read. + + @param b destination buffer. + @param off offset at which to start storing bytes. + @param len maximum number of bytes to read. + @return the number of bytes read, or -1 if the end of + the stream has been reached. + @exception IOException if an I/O error occurs. + ChecksumException if any checksum error occurs]]> +
    + + + + + + + + + + + + + + + + + + n bytes of data from the + input stream. + +

    This method may skip more bytes than are remaining in the backing + file. This produces no exception and the number of bytes skipped + may include some number of bytes that were beyond the EOF of the + backing file. Attempting to read from the stream after skipping past + the end will result in -1 indicating the end of the file. + +

    If n is negative, no bytes are skipped. + + @param n the number of bytes to be skipped. + @return the actual number of bytes skipped. + @exception IOException if an I/O error occurs. + ChecksumException if the chunk to skip to is corrupted]]> + + + + + + + This method may seek past the end of the file. + This produces no exception and an attempt to read from + the stream will result in -1 indicating the end of the file. + + @param pos the postion to seek to. + @exception IOException if an I/O error occurs. + ChecksumException if the chunk to seek to is corrupted]]> + + + + + + + + + + len bytes from + stm + + @param stm an input stream + @param buf destiniation buffer + @param offset offset at which to store data + @param len number of bytes to read + @return actual number of bytes read + @throws IOException if there is any IO error]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + len bytes from the specified byte array + starting at offset off and generate a checksum for + each data chunk. + +

    This method stores bytes from the given array into this + stream's buffer before it gets checksumed. The buffer gets checksumed + and flushed to the underlying output stream when all data + in a checksum chunk are in the buffer. If the buffer is empty and + requested length is at least as large as the size of next checksum chunk + size, this method will checksum and write the chunk directly + to the underlying output stream. Thus it avoids uneccessary data copy. + + @param b the data. + @param off the start offset in the data. + @param len the number of bytes to write. + @exception IOException if an I/O error occurs.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if and only if pathname + should be included]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + trash feature. Files are moved to a user's trash + directory, a subdirectory of their home directory named ".Trash". Files are + initially moved to a current sub-directory of the trash directory. + Within that sub-directory their original path is preserved. Periodically + one may checkpoint the current trash and remove older checkpoints. (This + design permits trash management without enumeration of the full trash + content, without date support in the filesystem, and without clock + synchronization.)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} backed by an FTP client provided by Apache Commons Net. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is a tool for migrating data from an older to a newer version + of an S3 filesystem. +

    +

    + All files in the filesystem are migrated by re-writing the block metadata + - no datafiles are touched. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + Extracts AWS credentials from the filesystem URI or configuration. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A block-based {@link FileSystem} backed by + Amazon S3. +

    + @see NativeS3FileSystem]]> +
    +
    + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If f is a file, this method will make a single call to S3. + If f is a directory, this method will make a maximum of + (n / 1000) + 2 calls to S3, where n is the total number of + files and directories contained directly in f. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} for reading and writing files stored on + Amazon S3. + Unlike {@link org.apache.hadoop.fs.s3.S3FileSystem} this implementation + stores files on S3 in their + native form so they can be read by other S3 tools. +

    + @see org.apache.hadoop.fs.s3.S3FileSystem]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + . + @param name The name of the server + @param port The port to use on the server + @param findPort whether the server should start at the given port and + increment by 1 until it finds a free port. + @param conf Configuration]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + points to the log directory + "/static/" -> points to common static files (src/webapps/static) + "/" -> the jsp server code from (src/webapps/)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + nth value.]]> + + + + + + + + + + + + + + + + + + + + + nth value in the file.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + public class IntArrayWritable extends ArrayWritable { + public IntArrayWritable() { + super(IntWritable.class); + } + } + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a ByteWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new DataInputStream and + ByteArrayInputStream each time data is read. + +

    Typical usage is something like the following:

    +
    + DataInputBuffer buffer = new DataInputBuffer();
    + while (... loop condition ...) {
    +   byte[] data = ... get data ...;
    +   int dataLength = ... get data length ...;
    +   buffer.reset(data, dataLength);
    +   ... read buffer using DataInput methods ...
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new DataOutputStream and + ByteArrayOutputStream each time data is written. + +

    Typical usage is something like the following:

    +
    + DataOutputBuffer buffer = new DataOutputBuffer();
    + while (... loop condition ...) {
    +   buffer.reset();
    +   ... write buffer using DataOutput methods ...
    +   byte[] data = buffer.getData();
    +   int dataLength = buffer.getLength();
    +   ... write data to its ultimate destination ...
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + the class of the item + @param conf the configuration to store + @param item the object to be stored + @param keyName the name of the key to use + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param items the objects to be stored + @param keyName the name of the key to use + @throws IndexOutOfBoundsException if the items array is empty + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + DefaultStringifier offers convenience methods to store/load objects to/from + the configuration. + + @param the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a DoubleWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a FloatWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When two sequence files, which have same Key type but different Value + types, are mapped out to reduce, multiple Value types is not allowed. + In this case, this class can help you wrap instances with different types. +

    + +

    + Compared with ObjectWritable, this class is much more effective, + because ObjectWritable will append the class declaration as a String + into the output file in every Key-Value pair. +

    + +

    + Generic Writable implements {@link Configurable} interface, so that it will be + configured by the framework. The configuration is passed to the wrapped objects + implementing {@link Configurable} interface before deserialization. +

    + + how to use it:
    + 1. Write your own class, such as GenericObject, which extends GenericWritable.
    + 2. Implements the abstract method getTypes(), defines + the classes which will be wrapped in GenericObject in application. + Attention: this classes defined in getTypes() method, must + implement Writable interface. +

    + + The code looks like this: +
    + public class GenericObject extends GenericWritable {
    + 
    +   private static Class[] CLASSES = {
    +               ClassType1.class, 
    +               ClassType2.class,
    +               ClassType3.class,
    +               };
    +
    +   protected Class[] getTypes() {
    +       return CLASSES;
    +   }
    +
    + }
    + 
    + + @since Nov 8, 2006]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new InputStream and + ByteArrayInputStream each time data is read. + +

    Typical usage is something like the following:

    +
    + InputBuffer buffer = new InputBuffer();
    + while (... loop condition ...) {
    +   byte[] data = ... get data ...;
    +   int dataLength = ... get data length ...;
    +   buffer.reset(data, dataLength);
    +   ... read buffer using InputStream methods ...
    + }
    + 
    + @see DataInputBuffer + @see DataOutput]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a IntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + closes the input and output streams + at the end. + @param in InputStrem to read from + @param out OutputStream to write to + @param conf the Configuration object]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ignore any {@link IOException} or + null pointers. Must only be used for cleanup in exception handlers. + @param log the log to record problems to at debug level. Can be null. + @param closeables the objects to close]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a LongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A map is a directory containing two files, the data file, + containing all keys and values in the map, and a smaller index + file, containing a fraction of the keys. The fraction is determined by + {@link Writer#getIndexInterval()}. + +

    The index file is read entirely into memory. Thus key implementations + should try to keep themselves small. + +

    Map files are created by adding entries in-order. To maintain a large + database, perform updates by copying the previous version of a database and + merging in a sorted change list, to create a new version of the database in + a new file. Sorting large change lists can be done with {@link + SequenceFile.Sorter}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key and + val. Returns true if such a pair exists and false when at + the end of the map]]> + + + + + + + + + + + + + + + + key or if it does not exist, at the first entry + after the named key. + +- * @param key - key that we're trying to find +- * @param val - data value if key is found +- * @return - the key that was the closest match or null if eof.]]> + + + + + + + + + key does not exist, return + the first entry that falls just before the key. Otherwise, + return the record that sorts just after. + @return - the key that was the closest match or null if eof.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is an MD5Hash whose digest contains the + same values.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new OutputStream and + ByteArrayOutputStream each time data is written. + +

    Typical usage is something like the following:

    +
    + OutputBuffer buffer = new OutputBuffer();
    + while (... loop condition ...) {
    +   buffer.reset();
    +   ... write buffer using OutputStream methods ...
    +   byte[] data = buffer.getData();
    +   int dataLength = buffer.getLength();
    +   ... write data to its ultimate destination ...
    + }
    + 
    + @see DataOutputBuffer + @see InputBuffer]]> +
    +
    + + + + + + + + + + + + + + + A {@link Comparator} that operates directly on byte representations of + objects. +

    + @param + @see DeserializerComparator]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SequenceFiles are flat files consisting of binary key/value + pairs. + +

    SequenceFile provides {@link Writer}, {@link Reader} and + {@link Sorter} classes for writing, reading and sorting respectively.

    + + There are three SequenceFile Writers based on the + {@link CompressionType} used to compress key/value pairs: +
      +
    1. + Writer : Uncompressed records. +
    2. +
    3. + RecordCompressWriter : Record-compressed files, only compress + values. +
    4. +
    5. + BlockCompressWriter : Block-compressed files, both keys & + values are collected in 'blocks' + separately and compressed. The size of + the 'block' is configurable. +
    + +

    The actual compression algorithm used to compress key and/or values can be + specified by using the appropriate {@link CompressionCodec}.

    + +

    The recommended way is to use the static createWriter methods + provided by the SequenceFile to chose the preferred format.

    + +

    The {@link Reader} acts as the bridge and can read any of the above + SequenceFile formats.

    + +

    SequenceFile Formats

    + +

    Essentially there are 3 different formats for SequenceFiles + depending on the CompressionType specified. All of them share a + common header described below. + +

    +
      +
    • + version - 3 bytes of magic header SEQ, followed by 1 byte of actual + version number (e.g. SEQ4 or SEQ6) +
    • +
    • + keyClassName -key class +
    • +
    • + valueClassName - value class +
    • +
    • + compression - A boolean which specifies if compression is turned on for + keys/values in this file. +
    • +
    • + blockCompression - A boolean which specifies if block-compression is + turned on for keys/values in this file. +
    • +
    • + compression codec - CompressionCodec class which is used for + compression of keys and/or values (if compression is + enabled). +
    • +
    • + metadata - {@link Metadata} for this file. +
    • +
    • + sync - A sync marker to denote end of the header. +
    • +
    + +
    Uncompressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Value
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +
    Record-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Compressed Value
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +
    Block-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record Block +
        +
      • Compressed key-lengths block-size
      • +
      • Compressed key-lengths block
      • +
      • Compressed keys block-size
      • +
      • Compressed keys block
      • +
      • Compressed value-lengths block-size
      • +
      • Compressed value-lengths block
      • +
      • Compressed values block-size
      • +
      • Compressed values block
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +

    The compressed blocks of key lengths and value lengths consist of the + actual lengths of individual keys/values encoded in ZeroCompressedInteger + format.

    + + @see CompressionCodec]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key, skipping its + value. True if another entry exists, and false at end of file.]]> + + + + + + + + key and + val. Returns true if such a pair exists and false when at + end of file]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The position passed must be a position returned by {@link + SequenceFile.Writer#getLength()} when writing this file. To seek to an arbitrary + position, use {@link SequenceFile.Reader#sync(long)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SegmentDescriptor + @param segments the list of SegmentDescriptors + @param tmpDir the directory to write temporary files into + @return RawKeyValueIterator + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For best performance, applications should make sure that the {@link + Writable#readFields(DataInput)} implementation of their keys is + very efficient. In particular, it should avoid allocating memory.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This always returns a synchronized position. In other words, + immediately after calling {@link SequenceFile.Reader#seek(long)} with a position + returned by this method, {@link SequenceFile.Reader#next(Writable)} may be called. However + the key may be earlier in the file than key last written when this + method was called (e.g., with block-compression, it may be the first key + in the block that was being written when this method was called).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key. Returns + true if such a key exists and false when at the end of the set.]]> + + + + + + + key. + Returns key, or null if no match exists.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + position. Note that this + method avoids using the converter or doing String instatiation + @return the Unicode scalar value at position or -1 + if the position is invalid or points to a + trailing byte]]> + + + + + + + + + + what in the backing + buffer, starting as position start. The starting + position is measured in bytes and the return value is in + terms of byte position in the buffer. The backing buffer is + not converted to a string for this operation. + @return byte position of the first occurence of the search + string in the UTF-8 buffer or -1 if not found]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a Text with the same contents.]]> + + + + + + + + + + + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException.]]> + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException. + @return ByteBuffer: bytes stores at ByteBuffer.array() + and length is ByteBuffer.limit()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + In + addition, it provides methods for string traversal without converting the + byte array to a string.

    Also includes utilities for + serializing/deserialing a string, coding/decoding a string, checking if a + byte array contains valid UTF8 code, calculating the length of an encoded + string.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a UTF8 with the same contents.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + Also includes utilities for efficiently reading and writing UTF-8. + + @deprecated replaced by Text]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is useful when a class may evolve, so that instances written by the + old version of the class may still be processed by the new version. To + handle this situation, {@link #readFields(DataInput)} + implementations should catch {@link VersionMismatchException}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VIntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VLongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + out. + + @param out DataOuput to serialize this object into. + @throws IOException]]> + + + + + + + in. + +

    For efficiency, implementations should attempt to re-use storage in the + existing object where possible.

    + + @param in DataInput to deseriablize this object from. + @throws IOException]]> +
    + + + Any key or value type in the Hadoop Map-Reduce + framework implements this interface.

    + +

    Implementations typically implement a static read(DataInput) + method which constructs a new instance, calls {@link #readFields(DataInput)} + and returns the instance.

    + +

    Example:

    +

    +     public class MyWritable implements Writable {
    +       // Some data     
    +       private int counter;
    +       private long timestamp;
    +       
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +       
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +       
    +       public static MyWritable read(DataInput in) throws IOException {
    +         MyWritable w = new MyWritable();
    +         w.readFields(in);
    +         return w;
    +       }
    +     }
    + 

    ]]> +
    + + + + + + + + WritableComparables can be compared to each other, typically + via Comparators. Any type which is to be used as a + key in the Hadoop Map-Reduce framework should implement this + interface.

    + +

    Example:

    +

    +     public class MyWritableComparable implements WritableComparable {
    +       // Some data
    +       private int counter;
    +       private long timestamp;
    +       
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +       
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +       
    +       public int compareTo(MyWritableComparable w) {
    +         int thisValue = this.value;
    +         int thatValue = ((IntWritable)o).value;
    +         return (thisValue < thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
    +       }
    +     }
    + 

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The default implementation reads the data into two {@link + WritableComparable}s (using {@link + Writable#readFields(DataInput)}, then calls {@link + #compare(WritableComparable,WritableComparable)}.]]> + + + + + + + The default implementation uses the natural ordering, calling {@link + Comparable#compareTo(Object)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This base implemenation uses the natural ordering. To define alternate + orderings, override {@link #compare(WritableComparable,WritableComparable)}. + +

    One may optimize compare-intensive operations by overriding + {@link #compare(byte[],int,int,byte[],int,int)}. Static utility methods are + provided to assist in optimized implementations of this method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Enum type + @param in DataInput to read from + @param enumType Class type of Enum + @return Enum represented by String read from DataInput + @throws IOException]]> + + + + + + + + + + + + + + + + len number of bytes in input streamin + @param in input stream + @param len number of bytes to skip + @throws IOException when skipped less number of bytes]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CompressionCodec for which to get the + Compressor + @return Compressor for the given + CompressionCodec from the pool or a new one]]> + + + + + + CompressionCodec for which to get the + Decompressor + @return Decompressor for the given + CompressionCodec the pool or a new one]]> + + + + + + Compressor to be returned to the pool]]> + + + + + + Decompressor to be returned to the + pool]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Implementations are assumed to be buffered. This permits clients to + reposition the underlying input stream then call {@link #resetState()}, + without having to also synchronize client buffers.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + true if a preset dictionary is needed for decompression. + @return true if a preset dictionary is needed for decompression]]> + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if native-lzo library is loaded & initialized; + else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + lzo compression/decompression pair. + http://www.oberhumer.com/opensource/lzo/]]> + + + + + + + + + + + + + + + + + + + + + + + lzo compression/decompression pair compatible with lzop. + http://www.lzop.org/]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FIXME: This array should be in a private or package private location, + since it could be modified by malicious code. +

    ]]> +
    + + + + This interface is public for historical purposes. You should have no need to + use it. +

    ]]> +
    + + + + + + + + + + Although BZip2 headers are marked with the magic "Bz" this + constructor expects the next byte in the stream to be the first one after + the magic. Thus callers have to skip the first two bytes. Otherwise this + constructor will throw an exception. +

    + + @throws IOException + if the stream content is malformed or an I/O error occurs. + @throws NullPointerException + if in == null]]> +
    +
    + + + + + + + + + + + + + + + The decompression requires large amounts of memory. Thus you should call the + {@link #close() close()} method as soon as possible, to force + CBZip2InputStream to release the allocated memory. See + {@link CBZip2OutputStream CBZip2OutputStream} for information about memory + usage. +

    + +

    + CBZip2InputStream reads bytes from the compressed source stream via + the single byte {@link java.io.InputStream#read() read()} method exclusively. + Thus you should consider to use a buffered source stream. +

    + +

    + Instances of this class are not threadsafe. +

    ]]> +
    +
    + + + + + + + + CBZip2OutputStream with a blocksize of 900k. + +

    + Attention: The caller is resonsible to write the two BZip2 magic + bytes "BZ" to the specified stream prior to calling this + constructor. +

    + + @param out * + the destination stream. + + @throws IOException + if an I/O error occurs in the specified stream. + @throws NullPointerException + if out == null.]]> +
    +
    + + + + CBZip2OutputStream with specified blocksize. + +

    + Attention: The caller is resonsible to write the two BZip2 magic + bytes "BZ" to the specified stream prior to calling this + constructor. +

    + + + @param out + the destination stream. + @param blockSize + the blockSize as 100k units. + + @throws IOException + if an I/O error occurs in the specified stream. + @throws IllegalArgumentException + if (blockSize < 1) || (blockSize > 9). + @throws NullPointerException + if out == null. + + @see #MIN_BLOCKSIZE + @see #MAX_BLOCKSIZE]]> +
    +
    + + + + + + + + + + + + + inputLength this method returns MAX_BLOCKSIZE + always. + + @param inputLength + The length of the data which will be compressed by + CBZip2OutputStream.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + == 1.]]> + + + + + == 9.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If you are ever unlucky/improbable enough to get a stack overflow whilst + sorting, increase the following constant and try again. In practice I + have never seen the stack go above 27 elems, so the following limit seems + very generous. +

    ]]> +
    +
    + + + The compression requires large amounts of memory. Thus you should call the + {@link #close() close()} method as soon as possible, to force + CBZip2OutputStream to release the allocated memory. +

    + +

    + You can shrink the amount of allocated memory and maybe raise the compression + speed by choosing a lower blocksize, which in turn may cause a lower + compression ratio. You can avoid unnecessary memory allocation by avoiding + using a blocksize which is bigger than the size of the input. +

    + +

    + You can compute the memory usage for compressing by the following formula: +

    + +
    + <code>400k + (9 * blocksize)</code>.
    + 
    + +

    + To get the memory required for decompression by {@link CBZip2InputStream + CBZip2InputStream} use +

    + +
    + <code>65k + (5 * blocksize)</code>.
    + 
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Memory usage by blocksize
    Blocksize Compression
    + memory usage
    Decompression
    + memory usage
    100k1300k565k
    200k2200k1065k
    300k3100k1565k
    400k4000k2065k
    500k4900k2565k
    600k5800k3065k
    700k6700k3565k
    800k7600k4065k
    900k8500k4565k
    + +

    + For decompression CBZip2InputStream allocates less memory if the + bzipped input is smaller than one block. +

    + +

    + Instances of this class are not threadsafe. +

    + +

    + TODO: Update to BZip2 1.0.1 +

    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + true if lzo compressors are loaded & initialized, + else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if lzo decompressors are loaded & initialized, + else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return the total (non-negative) number of uncompressed bytes input so far]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return the total (non-negative) number of uncompressed bytes input so far]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if native-zlib is loaded & initialized + and can be loaded for this job, else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Keep trying a limited number of times, waiting a fixed time between attempts, + and then fail by re-throwing the exception. +

    ]]> +
    +
    + + + + + + + Keep trying for a maximum time, waiting a fixed time between attempts, + and then fail by re-throwing the exception. +

    ]]> +
    +
    + + + + + + + Keep trying a limited number of times, waiting a growing amount of time between attempts, + and then fail by re-throwing the exception. + The time between attempts is sleepTime mutliplied by the number of tries so far. +

    ]]> +
    +
    + + + + + + + Keep trying a limited number of times, waiting a growing amount of time between attempts, + and then fail by re-throwing the exception. + The time between attempts is sleepTime mutliplied by a random + number in the range of [0, 2 to the number of retries) +

    ]]> +
    +
    + + + + + + Set a default policy with some explicit handlers for specific exceptions. +

    ]]> +
    +
    + + + + + + A retry policy for RemoteException + Set a default policy with some explicit handlers for specific exceptions. +

    ]]> +
    +
    + + + + Try once, and fail by re-throwing the exception. + This corresponds to having no retry mechanism in place. +

    ]]> +
    +
    + + + + Try once, and fail silently for void methods, or by + re-throwing the exception for non-void methods. +

    ]]> +
    +
    + + + + Keep trying forever. +

    ]]> +
    +
    + + + A collection of useful implementations of {@link RetryPolicy}. +

    ]]> +
    +
    + + + + + + + + + + Determines whether the framework should retry a + method for the given exception, and the number + of retries that have been made for that operation + so far. +

    + @param e The exception that caused the method to fail. + @param retries The number of times the method has been retried. + @return true if the method should be retried, + false if the method should not be retried + but shouldn't fail with an exception (only for void methods). + @throws Exception The re-thrown exception e indicating + that the method failed and should not be retried further.]]> +
    +
    + + + Specifies a policy for retrying method failures. + Implementations of this interface should be immutable. +

    ]]> +
    +
    + + + + + + + + + + + + Create a proxy for an interface of an implementation class + using the same retry policy for each method in the interface. +

    + @param iface the interface that the retry will implement + @param implementation the instance whose methods should be retried + @param retryPolicy the policy for retirying method call failures + @return the retry proxy]]> +
    +
    + + + + + + + Create a proxy for an interface of an implementation class + using the a set of retry policies specified by method name. + If no retry policy is defined for a method then a default of + {@link RetryPolicies#TRY_ONCE_THEN_FAIL} is used. +

    + @param iface the interface that the retry will implement + @param implementation the instance whose methods should be retried + @param methodNameToPolicyMap a map of method names to retry policies + @return the retry proxy]]> +
    +
    + + + A factory for creating retry proxies. +

    ]]> +
    +
    + +
    + + + + + + + + Prepare the deserializer for reading.

    ]]> +
    +
    + + + + + + Deserialize the next object from the underlying input stream. + If the object t is non-null then this deserializer + may set its internal state to the next object read from the input + stream. Otherwise, if the object t is null a new + deserialized object will be created. +

    + @return the deserialized object]]> +
    +
    + + + + Close the underlying input stream and clear up any resources.

    ]]> +
    +
    + + + Provides a facility for deserializing objects of type from an + {@link InputStream}. +

    + +

    + Deserializers are stateful, but must not buffer the input since + other producers may read from the input between calls to + {@link #deserialize(Object)}. +

    + @param ]]> +
    +
    + + + + + + + + + + + + + + + + + + A {@link RawComparator} that uses a {@link Deserializer} to deserialize + the objects to be compared so that the standard {@link Comparator} can + be used to compare them. +

    +

    + One may optimize compare-intensive operations by using a custom + implementation of {@link RawComparator} that operates directly + on byte representations. +

    + @param ]]> +
    +
    + + + + + + + + + + + + + + + + + + An experimental {@link Serialization} for Java {@link Serializable} classes. +

    + @see JavaSerializationComparator]]> +
    +
    + + + + + + + + + + + + + A {@link RawComparator} that uses a {@link JavaSerialization} + {@link Deserializer} to deserialize objects that are then compared via + their {@link Comparable} interfaces. +

    + @param + @see JavaSerialization]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + Encapsulates a {@link Serializer}/{@link Deserializer} pair. +

    + @param ]]> +
    +
    + + + + + + + Serializations are found by reading the io.serializations + property from conf, which is a comma-delimited list of + classnames. +

    ]]> +
    +
    + + + + + + + + + + + + A factory for {@link Serialization}s. +

    ]]> +
    +
    + + + + + + + + Prepare the serializer for writing.

    ]]> +
    +
    + + + + + Serialize t to the underlying output stream.

    ]]> +
    +
    + + + + Close the underlying output stream and clear up any resources.

    ]]> +
    +
    + + + Provides a facility for serializing objects of type to an + {@link OutputStream}. +

    + +

    + Serializers are stateful, but must not buffer the output since + other producers may write to the output between calls to + {@link #serialize(Object)}. +

    + @param ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + param, to the IPC server running at + address, returning the value. Throws exceptions if there are + network problems or if the remote code threw an exception.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Unwraps any IOException. + + @param lookupTypes the desired exception class. + @return IOException, which is either the lookupClass exception or this.]]> + + + + + This unwraps any Throwable that has a constructor taking + a String as a parameter. + Otherwise it returns this. + + @return Throwable]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + protocol is a Java interface. All parameters and return types must + be one of: + +
    • a primitive type, boolean, byte, + char, short, int, long, + float, double, or void; or
    • + +
    • a {@link String}; or
    • + +
    • a {@link Writable}; or
    • + +
    • an array of the above types
    + + All methods in the protocol should throw only IOException. No field data of + the protocol instance is transmitted.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + handlerCount determines + the number of handler threads that will be used to process calls.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + This class has a number of metrics variables that are publicly accessible; + these variables (objects) have methods to update their values; + for example: +

    {@link #rpcQueueTime}.inc(time)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For the statistics that are sampled and averaged, one must specify + a metrics context that does periodic update calls. Most do. + The default Null metrics context however does NOT. So if you aren't + using any other metrics context then you can turn on the viewing and averaging + of sampled metrics by specifying the following two lines + in the hadoop-meterics.properties file: +

    +        rpc.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
    +        rpc.period=10
    +  
    +

    + Note that the metrics are collected regardless of the context used. + The context with the update thread is used to average the data periodically]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobTracker, + as {@link JobTracker.State} + + @return the current state of the JobTracker.]]> + + + + + + + + + + + + ClusterStatus provides clients with information such as: +

      +
    1. + Size of the cluster. +
    2. +
    3. + Task capacity of the cluster. +
    4. +
    5. + The number of currently running map & reduce tasks. +
    6. +
    7. + State of the JobTracker. +
    8. +

    + +

    Clients can query for the latest ClusterStatus, via + {@link JobClient#getClusterStatus()}.

    + + @see JobClient]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Counters represent global counters, defined either by the + Map-Reduce framework or applications. Each Counter can be of + any {@link Enum} type.

    + +

    Counters are bunched into {@link Group}s, each comprising of + counters from a particular Enum class.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Group of counters, comprising of counters from a particular + counter {@link Enum} class. + +

    Grouphandles localization of the class name and the + counter names.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileInputFormat implementations can override this and return + false to ensure that individual input files are never split-up + so that {@link Mapper}s process entire files. + + @param fs the file system that the file is on + @param filename the file name to check + @return is this file splitable?]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileInputFormat is the base class for all file-based + InputFormats. This provides a generic implementation of + {@link #getSplits(JobConf, int)}. + Subclasses of FileInputFormat can also override the + {@link #isSplitable(FileSystem, Path)} method to ensure input-files are + not split-up and are processed as a whole by {@link Mapper}s.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the job output should be compressed, + false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tasks' Side-Effect Files + +

    Note: The following is valid only if the {@link OutputCommitter} + is {@link FileOutputCommitter}. If OutputCommitter is not + a FileOutputCommitter, the task's temporary output + directory is same as {@link #getOutputPath(JobConf)} i.e. + ${mapred.output.dir}$

    + +

    Some applications need to create/write-to side-files, which differ from + the actual job-outputs. + +

    In such cases there could be issues with 2 instances of the same TIP + (running simultaneously e.g. speculative tasks) trying to open/write-to the + same file (path) on HDFS. Hence the application-writer will have to pick + unique names per task-attempt (e.g. using the attemptid, say + attempt_200709221812_0001_m_000000_0), not just per TIP.

    + +

    To get around this the Map-Reduce framework helps the application-writer + out by maintaining a special + ${mapred.output.dir}/_temporary/_${taskid} + sub-directory for each task-attempt on HDFS where the output of the + task-attempt goes. On successful completion of the task-attempt the files + in the ${mapred.output.dir}/_temporary/_${taskid} (only) + are promoted to ${mapred.output.dir}. Of course, the + framework discards the sub-directory of unsuccessful task-attempts. This + is completely transparent to the application.

    + +

    The application-writer can take advantage of this by creating any + side-files required in ${mapred.work.output.dir} during execution + of his reduce-task i.e. via {@link #getWorkOutputPath(JobConf)}, and the + framework will move them out similarly - thus she doesn't have to pick + unique paths per task-attempt.

    + +

    Note: the value of ${mapred.work.output.dir} during + execution of a particular task-attempt is actually + ${mapred.output.dir}/_temporary/_{$taskid}, and this value is + set by the map-reduce framework. So, just create any side-files in the + path returned by {@link #getWorkOutputPath(JobConf)} from map/reduce + task to take advantage of this feature.

    + +

    The entire discussion holds true for maps of jobs with + reducer=NONE (i.e. 0 reduces) since output of the map, in that case, + goes directly to HDFS.

    + + @return the {@link Path} to the task's temporary output directory + for the map-reduce job.]]> +
    +
    + + + + + + + + + + + + + The generated name can be used to create custom files from within the + different tasks for the job, the names for different tasks will not collide + with each other.

    + +

    The given name is postfixed with the task type, 'm' for maps, 'r' for + reduces and the task partition number. For example, give a name 'test' + running on the first map o the job the generated name will be + 'test-m-00000'.

    + + @param conf the configuration for the job. + @param name the name to make unique. + @return a unique name accross all tasks of the job.]]> +
    +
    + + + + + The path can be used to create custom files from within the map and + reduce tasks. The path name will be unique for each task. The path parent + will be the job output directory.

    ls + +

    This method uses the {@link #getUniqueName} method to make the file name + unique for the task.

    + + @param conf the configuration for the job. + @param name the name for the file. + @return a unique path accross all tasks of the job.]]> +
    +
    + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Each {@link InputSplit} is then assigned to an individual {@link Mapper} + for processing.

    + +

    Note: The split is a logical split of the inputs and the + input files are not physically split into chunks. For e.g. a split could + be <input-file-path, start, offset> tuple. + + @param job job configuration. + @param numSplits the desired number of splits, a hint. + @return an array of {@link InputSplit}s for the job.]]> + + + + + + + + + It is the responsibility of the RecordReader to respect + record boundaries while processing the logical split to present a + record-oriented view to the individual task.

    + + @param split the {@link InputSplit} + @param job the job that this split belongs to + @return a {@link RecordReader}]]> +
    +
    + + InputFormat describes the input-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the InputFormat of the + job to:

    +

      +
    1. + Validate the input-specification of the job. +
    2. + Split-up the input file(s) into logical {@link InputSplit}s, each of + which is then assigned to an individual {@link Mapper}. +
    3. +
    4. + Provide the {@link RecordReader} implementation to be used to glean + input records from the logical InputSplit for processing by + the {@link Mapper}. +
    5. +
    + +

    The default behavior of file-based {@link InputFormat}s, typically + sub-classes of {@link FileInputFormat}, is to split the + input into logical {@link InputSplit}s based on the total size, in + bytes, of the input files. However, the {@link FileSystem} blocksize of + the input files is treated as an upper bound for input splits. A lower bound + on the split size can be set via + + mapred.min.split.size.

    + +

    Clearly, logical splits based on input-size is insufficient for many + applications since record boundaries are to respected. In such cases, the + application has to also implement a {@link RecordReader} on whom lies the + responsibilty to respect record-boundaries and present a record-oriented + view of the logical InputSplit to the individual task. + + @see InputSplit + @see RecordReader + @see JobClient + @see FileInputFormat]]> + + + + + + + + + + InputSplit. + + @return the number of bytes in the input split. + @throws IOException]]> + + + + + + InputSplit is + located as an array of Strings. + @throws IOException]]> + + + + InputSplit represents the data to be processed by an + individual {@link Mapper}. + +

    Typically, it presents a byte-oriented view on the input and is the + responsibility of {@link RecordReader} of the job to process this and present + a record-oriented view. + + @see InputFormat + @see RecordReader]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobClient.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + jobid doesn't correspond to any known job. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobClient is the primary interface for the user-job to interact + with the {@link JobTracker}. + + JobClient provides facilities to submit jobs, track their + progress, access component-tasks' reports/logs, get the Map-Reduce cluster + status information etc. + +

    The job submission process involves: +

      +
    1. + Checking the input and output specifications of the job. +
    2. +
    3. + Computing the {@link InputSplit}s for the job. +
    4. +
    5. + Setup the requisite accounting information for the {@link DistributedCache} + of the job, if necessary. +
    6. +
    7. + Copying the job's jar and configuration to the map-reduce system directory + on the distributed file-system. +
    8. +
    9. + Submitting the job to the JobTracker and optionally monitoring + it's status. +
    10. +

    + + Normally the user creates the application, describes various facets of the + job via {@link JobConf} and then uses the JobClient to submit + the job and monitor its progress. + +

    Here is an example on how to use JobClient:

    +

    +     // Create a new JobConf
    +     JobConf job = new JobConf(new Configuration(), MyJob.class);
    +     
    +     // Specify various job-specific parameters     
    +     job.setJobName("myjob");
    +     
    +     job.setInputPath(new Path("in"));
    +     job.setOutputPath(new Path("out"));
    +     
    +     job.setMapperClass(MyJob.MyMapper.class);
    +     job.setReducerClass(MyJob.MyReducer.class);
    +
    +     // Submit the job, then poll for progress until the job is complete
    +     JobClient.runJob(job);
    + 

    + +

    Job Control

    + +

    At times clients would chain map-reduce jobs to accomplish complex tasks + which cannot be done via a single map-reduce job. This is fairly easy since + the output of the job, typically, goes to distributed file-system and that + can be used as the input for the next job.

    + +

    However, this also means that the onus on ensuring jobs are complete + (success/failure) lies squarely on the clients. In such situations the + various job-control options are: +

      +
    1. + {@link #runJob(JobConf)} : submits the job and returns only after + the job has completed. +
    2. +
    3. + {@link #submitJob(JobConf)} : only submits the job, then poll the + returned handle to the {@link RunningJob} to query status and make + scheduling decisions. +
    4. +
    5. + {@link JobConf#setJobEndNotificationURI(String)} : setup a notification + on job-completion, thus avoiding polling. +
    6. +

    + + @see JobConf + @see ClusterStatus + @see Tool + @see DistributedCache]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If the parameter {@code loadDefaults} is false, the new instance + will not load resources from the default files. + + @param loadDefaults specifies whether to load from the default files]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if framework should keep the intermediate files + for failed tasks, false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the outputs of the maps are to be compressed, + false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This comparator should be provided if the equivalence rules for keys + for sorting the intermediates are different from those for grouping keys + before each call to + {@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.

    + +

    For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed + in a single call to the reduce function if K1 and K2 compare as equal.

    + +

    Since {@link #setOutputKeyComparatorClass(Class)} can be used to control + how keys are sorted, this can be used in conjunction to simulate + secondary sort on values.

    + +

    Note: This is not a guarantee of the reduce sort being + stable in any sense. (In any case, with the order of available + map-outputs to the reduce being non-deterministic, it wouldn't make + that much sense.)

    + + @param theClass the comparator class to be used for grouping keys. + It should implement RawComparator. + @see #setOutputKeyComparatorClass(Class)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + combiner class used to combine map-outputs + before being sent to the reducers. Typically the combiner is same as the + the {@link Reducer} for the job i.e. {@link #getReducerClass()}. + + @return the user-defined combiner class used to combine map-outputs.]]> + + + + + + combiner class used to combine map-outputs + before being sent to the reducers. + +

    The combiner is a task-level aggregation operation which, in some cases, + helps to cut down the amount of data transferred from the {@link Mapper} to + the {@link Reducer}, leading to better performance.

    + +

    Typically the combiner is same as the Reducer for the + job i.e. {@link #setReducerClass(Class)}.

    + + @param theClass the user-defined combiner class used to combine + map-outputs.]]> +
    +
    + + + true. + + @return true if speculative execution be used for this job, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on, else false.]]> + + + + + true. + + @return true if speculative execution be + used for this job for map tasks, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on for map tasks, + else false.]]> + + + + + true. + + @return true if speculative execution be used + for reduce tasks for this job, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on for reduce tasks, + else false.]]> + + + + + 1. + + @return the number of reduce tasks for this job.]]> + + + + + + Note: This is only a hint to the framework. The actual + number of spawned map tasks depends on the number of {@link InputSplit}s + generated by the job's {@link InputFormat#getSplits(JobConf, int)}. + + A custom {@link InputFormat} is typically used to accurately control + the number of map tasks for the job.

    + +

    How many maps?

    + +

    The number of maps is usually driven by the total size of the inputs + i.e. total number of blocks of the input files.

    + +

    The right level of parallelism for maps seems to be around 10-100 maps + per-node, although it has been set up to 300 or so for very cpu-light map + tasks. Task setup takes awhile, so it is best if the maps take at least a + minute to execute.

    + +

    The default behavior of file-based {@link InputFormat}s is to split the + input into logical {@link InputSplit}s based on the total size, in + bytes, of input files. However, the {@link FileSystem} blocksize of the + input files is treated as an upper bound for input splits. A lower bound + on the split size can be set via + + mapred.min.split.size.

    + +

    Thus, if you expect 10TB of input data and have a blocksize of 128MB, + you'll end up with 82,000 maps, unless {@link #setNumMapTasks(int)} is + used to set it even higher.

    + + @param n the number of map tasks for this job. + @see InputFormat#getSplits(JobConf, int) + @see FileInputFormat + @see FileSystem#getDefaultBlockSize() + @see FileStatus#getBlockSize()]]> +
    +
    + + + 1. + + @return the number of reduce tasks for this job.]]> + + + + + + How many reduces? + +

    The right number of reduces seems to be 0.95 or + 1.75 multiplied by (<no. of nodes> * + + mapred.tasktracker.reduce.tasks.maximum). +

    + +

    With 0.95 all of the reduces can launch immediately and + start transfering map outputs as the maps finish. With 1.75 + the faster nodes will finish their first round of reduces and launch a + second wave of reduces doing a much better job of load balancing.

    + +

    Increasing the number of reduces increases the framework overhead, but + increases load balancing and lowers the cost of failures.

    + +

    The scaling factors above are slightly less than whole numbers to + reserve a few reduce slots in the framework for speculative-tasks, failures + etc.

    + +

    Reducer NONE

    + +

    It is legal to set the number of reduce-tasks to zero.

    + +

    In this case the output of the map-tasks directly go to distributed + file-system, to the path set by + {@link FileOutputFormat#setOutputPath(JobConf, Path)}. Also, the + framework doesn't sort the map-outputs before writing it out to HDFS.

    + + @param n the number of reduce tasks for this job.]]> +
    +
    + + + mapred.map.max.attempts + property. If this property is not already set, the default is 4 attempts. + + @return the max number of attempts per map task.]]> + + + + + + + + + + + mapred.reduce.max.attempts + property. If this property is not already set, the default is 4 attempts. + + @return the max number of attempts per reduce task.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + noFailures, the + tasktracker is blacklisted for this job. + + @param noFailures maximum no. of failures of a given job per tasktracker.]]> + + + + + blacklisted for this job. + + @return the maximum no. of failures of a given job per tasktracker.]]> + + + + + failed. + + Defaults to zero, i.e. any failed map-task results in + the job being declared as {@link JobStatus#FAILED}. + + @return the maximum percentage of map tasks that can fail without + the job being aborted.]]> + + + + + + failed. + + @param percent the maximum percentage of map tasks that can fail without + the job being aborted.]]> + + + + + failed. + + Defaults to zero, i.e. any failed reduce-task results + in the job being declared as {@link JobStatus#FAILED}. + + @return the maximum percentage of reduce tasks that can fail without + the job being aborted.]]> + + + + + + failed. + + @param percent the maximum percentage of reduce tasks that can fail without + the job being aborted.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The debug script can aid debugging of failed map tasks. The script is + given task's stdout, stderr, syslog, jobconf files as arguments.

    + +

    The debug command, run on the node where the map failed, is:

    +

    + $script $stdout $stderr $syslog $jobconf. +

    + +

    The script file is distributed through {@link DistributedCache} + APIs. The script needs to be symlinked.

    + +

    Here is an example on how to submit a script +

    + job.setMapDebugScript("./myscript");
    + DistributedCache.createSymlink(job);
    + DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
    + 

    + + @param mDbgScript the script name]]> +
    +
    + + + + + + + + + The debug script can aid debugging of failed reduce tasks. The script + is given task's stdout, stderr, syslog, jobconf files as arguments.

    + +

    The debug command, run on the node where the map failed, is:

    +

    + $script $stdout $stderr $syslog $jobconf. +

    + +

    The script file is distributed through {@link DistributedCache} + APIs. The script file needs to be symlinked

    + +

    Here is an example on how to submit a script +

    + job.setReduceDebugScript("./myscript");
    + DistributedCache.createSymlink(job);
    + DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
    + 

    + + @param rDbgScript the script name]]> +
    +
    + + + + + + + + null if it hasn't + been set. + @see #setJobEndNotificationURI(String)]]> + + + + + + The uri can contain 2 special parameters: $jobId and + $jobStatus. Those, if present, are replaced by the job's + identifier and completion-status respectively.

    + +

    This is typically used by application-writers to implement chaining of + Map-Reduce jobs in an asynchronous manner.

    + + @param uri the job end notification uri + @see JobStatus + @see Job Completion and Chaining]]> +
    +
    + + + + When a job starts, a shared directory is created at location + + ${mapred.local.dir}/taskTracker/jobcache/$jobid/work/ . + This directory is exposed to the users through + job.local.dir . + So, the tasks can use this space + as scratch space and share files among them.

    + This value is available as System property also. + + @return The localized job specific shared directory]]> +
    +
    + + + + + + + + + + + + + + + + + + JobConf is the primary interface for a user to describe a + map-reduce job to the Hadoop framework for execution. The framework tries to + faithfully execute the job as-is described by JobConf, however: +
      +
    1. + Some configuration parameters might have been marked as + + final by administrators and hence cannot be altered. +
    2. +
    3. + While some job parameters are straight-forward to set + (e.g. {@link #setNumReduceTasks(int)}), some parameters interact subtly + rest of the framework and/or job-configuration and is relatively more + complex for the user to control finely (e.g. {@link #setNumMapTasks(int)}). +
    4. +

    + +

    JobConf typically specifies the {@link Mapper}, combiner + (if any), {@link Partitioner}, {@link Reducer}, {@link InputFormat} and + {@link OutputFormat} implementations to be used etc. + +

    Optionally JobConf is used to specify other advanced facets + of the job such as Comparators to be used, files to be put in + the {@link DistributedCache}, whether or not intermediate and/or job outputs + are to be compressed (and how), debugability via user-provided scripts + ( {@link #setMapDebugScript(String)}/{@link #setReduceDebugScript(String)}), + for doing post-processing on task logs, task's stdout, stderr, syslog. + and etc.

    + +

    Here is an example on how to configure a job via JobConf:

    +

    +     // Create a new JobConf
    +     JobConf job = new JobConf(new Configuration(), MyJob.class);
    +     
    +     // Specify various job-specific parameters     
    +     job.setJobName("myjob");
    +     
    +     FileInputFormat.setInputPaths(job, new Path("in"));
    +     FileOutputFormat.setOutputPath(job, new Path("out"));
    +     
    +     job.setMapperClass(MyJob.MyMapper.class);
    +     job.setCombinerClass(MyJob.MyReducer.class);
    +     job.setReducerClass(MyJob.MyReducer.class);
    +     
    +     job.setInputFormat(SequenceFileInputFormat.class);
    +     job.setOutputFormat(SequenceFileOutputFormat.class);
    + 

    + + @see JobClient + @see ClusterStatus + @see Tool + @see DistributedCache]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + .]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + any job + run on the jobtracker started at 200707121733, we would use : +
     
    + JobID.getTaskIDsPattern("200707121733", null);
    + 
    + which will return : +
     "job_200707121733_[0-9]*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @return a regex pattern matching JobIDs]]> +
    +
    + + + An example JobID is : + job_200707121733_0003 , which represents the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse JobID strings, but rather + use appropriate constructors or {@link #forName(String)} method. + + @see TaskID + @see TaskAttemptID + @see JobTracker#getNewJobId() + @see JobTracker#getStartTime()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + "N/A" + + @return Scheduling information associated to particular Job Queue]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -archives + -files inputjar args]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + zero. + + @param conf configuration for the JobTracker. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Output pairs need not be of the same types as input pairs. A given + input pair may map to zero or many output pairs. Output pairs are + collected with calls to + {@link OutputCollector#collect(Object,Object)}.

    + +

    Applications can use the {@link Reporter} provided to report progress + or just indicate that they are alive. In scenarios where the application + takes an insignificant amount of time to process individual key/value + pairs, this is crucial since the framework might assume that the task has + timed-out and kill that task. The other way of avoiding this is to set + + mapred.task.timeout to a high-enough value (or even zero for no + time-outs).

    + + @param key the input key. + @param value the input value. + @param output collects mapped keys and values. + @param reporter facility to report progress.]]> +
    + + + Maps are the individual tasks which transform input records into a + intermediate records. The transformed intermediate records need not be of + the same type as the input records. A given input pair may map to zero or + many output pairs.

    + +

    The Hadoop Map-Reduce framework spawns one map task for each + {@link InputSplit} generated by the {@link InputFormat} for the job. + Mapper implementations can access the {@link JobConf} for the + job via the {@link JobConfigurable#configure(JobConf)} and initialize + themselves. Similarly they can use the {@link Closeable#close()} method for + de-initialization.

    + +

    The framework then calls + {@link #map(Object, Object, OutputCollector, Reporter)} + for each key/value pair in the InputSplit for that task.

    + +

    All intermediate values associated with a given output key are + subsequently grouped by the framework, and passed to a {@link Reducer} to + determine the final output. Users can control the grouping by specifying + a Comparator via + {@link JobConf#setOutputKeyComparatorClass(Class)}.

    + +

    The grouped Mapper outputs are partitioned per + Reducer. Users can control which keys (and hence records) go to + which Reducer by implementing a custom {@link Partitioner}. + +

    Users can optionally specify a combiner, via + {@link JobConf#setCombinerClass(Class)}, to perform local aggregation of the + intermediate outputs, which helps to cut down the amount of data transferred + from the Mapper to the Reducer. + +

    The intermediate, grouped outputs are always stored in + {@link SequenceFile}s. Applications can specify if and how the intermediate + outputs are to be compressed and which {@link CompressionCodec}s are to be + used via the JobConf.

    + +

    If the job has + zero + reduces then the output of the Mapper is directly written + to the {@link FileSystem} without grouping by keys.

    + +

    Example:

    +

    +     public class MyMapper<K extends WritableComparable, V extends Writable> 
    +     extends MapReduceBase implements Mapper<K, V, K, V> {
    +     
    +       static enum MyCounters { NUM_RECORDS }
    +       
    +       private String mapTaskId;
    +       private String inputFile;
    +       private int noRecords = 0;
    +       
    +       public void configure(JobConf job) {
    +         mapTaskId = job.get("mapred.task.id");
    +         inputFile = job.get("mapred.input.file");
    +       }
    +       
    +       public void map(K key, V val,
    +                       OutputCollector<K, V> output, Reporter reporter)
    +       throws IOException {
    +         // Process the <key, value> pair (assume this takes a while)
    +         // ...
    +         // ...
    +         
    +         // Let the framework know that we are alive, and kicking!
    +         // reporter.progress();
    +         
    +         // Process some more
    +         // ...
    +         // ...
    +         
    +         // Increment the no. of <key, value> pairs processed
    +         ++noRecords;
    +
    +         // Increment counters
    +         reporter.incrCounter(NUM_RECORDS, 1);
    +        
    +         // Every 100 records update application-level status
    +         if ((noRecords%100) == 0) {
    +           reporter.setStatus(mapTaskId + " processed " + noRecords + 
    +                              " from input-file: " + inputFile); 
    +         }
    +         
    +         // Output the result
    +         output.collect(key, val);
    +       }
    +     }
    + 

    + +

    Applications may write a custom {@link MapRunnable} to exert greater + control on map processing e.g. multi-threaded Mappers etc.

    + + @see JobConf + @see InputFormat + @see Partitioner + @see Reducer + @see MapReduceBase + @see MapRunnable + @see SequenceFile]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + Provides default no-op implementations for a few methods, most non-trivial + applications need to override some of them.

    ]]> +
    +
    + + + + + + + + + + + <key, value> pairs. + +

    Mapping of input records to output records is complete when this method + returns.

    + + @param input the {@link RecordReader} to read the input records. + @param output the {@link OutputCollector} to collect the outputrecords. + @param reporter {@link Reporter} to report progress, status-updates etc. + @throws IOException]]> +
    +
    + + Custom implementations of MapRunnable can exert greater + control on map processing e.g. multi-threaded, asynchronous mappers etc.

    + + @see Mapper]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + nearly + equal content length.
    + Subclasses implement {@link #getRecordReader(InputSplit, JobConf, Reporter)} + to construct RecordReader's for MultiFileSplit's. + @see MultiFileSplit]]> +
    +
    + + + + + + + + + + + + + + + + + th Path]]> + + + + + + + + + + + th Path]]> + + + + + + + + + + + + + + + + + + + + + + + MultiFileSplit can be used to implement {@link RecordReader}'s, with + reading one record per file. + @see FileSplit + @see MultiFileInputFormat]]> + + + + + + + + + + + + + + + <key, value> pairs output by {@link Mapper}s + and {@link Reducer}s. + +

    OutputCollector is the generalization of the facility + provided by the Map-Reduce framework to collect data output by either the + Mapper or the Reducer i.e. intermediate outputs + or the output of the job.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + OutputCommitter describes the commit of task output for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the OutputCommitter of + the job to:

    +

      +
    1. + Setup the job during initialization. For example, create the temporary + output directory for the job during the initialization of the job. +
    2. +
    3. + Cleanup the job after the job completion. For example, remove the + temporary output directory after the job completion. +
    4. +
    5. + Setup the task temporary output. +
    6. +
    7. + Check whether a task needs a commit. This is to avoid the commit + procedure if a task does not need commit. +
    8. +
    9. + Commit of the task output. +
    10. +
    11. + Discard the task commit. +
    12. +
    + + @see FileOutputCommitter + @see JobContext + @see TaskAttemptContext]]> +
    +
    + + + + + + + + + + + + + + + + + + + This is to validate the output specification for the job when it is + a job is submitted. Typically checks that it does not already exist, + throwing an exception when it already exists, so that output is not + overwritten.

    + + @param ignored + @param job job configuration. + @throws IOException when output should not be attempted]]> +
    +
    + + OutputFormat describes the output-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the OutputFormat of the + job to:

    +

      +
    1. + Validate the output-specification of the job. For e.g. check that the + output directory doesn't already exist. +
    2. + Provide the {@link RecordWriter} implementation to be used to write out + the output files of the job. Output files are stored in a + {@link FileSystem}. +
    3. +
    + + @see RecordWriter + @see JobConf]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + Typically a hash function on a all or a subset of the key.

    + + @param key the key to be paritioned. + @param value the entry value. + @param numPartitions the total number of partitions. + @return the partition number for the key.]]> +
    +
    + + Partitioner controls the partitioning of the keys of the + intermediate map-outputs. The key (or a subset of the key) is used to derive + the partition, typically by a hash function. The total number of partitions + is the same as the number of reduce tasks for the job. Hence this controls + which of the m reduce tasks the intermediate key (and hence the + record) is sent for reduction.

    + + @see Reducer]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.0 to 1.0. + @throws IOException]]> + + + + RecordReader reads <key, value> pairs from an + {@link InputSplit}. + +

    RecordReader, typically, converts the byte-oriented view of + the input, provided by the InputSplit, and presents a + record-oriented view for the {@link Mapper} & {@link Reducer} tasks for + processing. It thus assumes the responsibility of processing record + boundaries and presenting the tasks with keys and values.

    + + @see InputSplit + @see InputFormat]]> +
    +
    + + + + + + + + + + + + + + + + RecordWriter to future operations. + + @param reporter facility to report progress. + @throws IOException]]> + + + + RecordWriter writes the output <key, value> pairs + to an output file. + +

    RecordWriter implementations write the job outputs to the + {@link FileSystem}. + + @see OutputFormat]]> + + + + + + + + + + + + + + + Reduces values for a given key. + +

    The framework calls this method for each + <key, (list of values)> pair in the grouped inputs. + Output values must be of the same type as input values. Input keys must + not be altered. The framework will reuse the key and value objects + that are passed into the reduce, therefore the application should clone + the objects they want to keep a copy of. In many cases, all values are + combined into zero or one value. +

    + +

    Output pairs are collected with calls to + {@link OutputCollector#collect(Object,Object)}.

    + +

    Applications can use the {@link Reporter} provided to report progress + or just indicate that they are alive. In scenarios where the application + takes an insignificant amount of time to process individual key/value + pairs, this is crucial since the framework might assume that the task has + timed-out and kill that task. The other way of avoiding this is to set + + mapred.task.timeout to a high-enough value (or even zero for no + time-outs).

    + + @param key the key. + @param values the list of values to reduce. + @param output to collect keys and combined values. + @param reporter facility to report progress.]]> +
    + + + The number of Reducers for the job is set by the user via + {@link JobConf#setNumReduceTasks(int)}. Reducer implementations + can access the {@link JobConf} for the job via the + {@link JobConfigurable#configure(JobConf)} method and initialize themselves. + Similarly they can use the {@link Closeable#close()} method for + de-initialization.

    + +

    Reducer has 3 primary phases:

    +
      +
    1. + +

      Shuffle

      + +

      Reducer is input the grouped output of a {@link Mapper}. + In the phase the framework, for each Reducer, fetches the + relevant partition of the output of all the Mappers, via HTTP. +

      +
    2. + +
    3. +

      Sort

      + +

      The framework groups Reducer inputs by keys + (since different Mappers may have output the same key) in this + stage.

      + +

      The shuffle and sort phases occur simultaneously i.e. while outputs are + being fetched they are merged.

      + +
      SecondarySort
      + +

      If equivalence rules for keys while grouping the intermediates are + different from those for grouping keys before reduction, then one may + specify a Comparator via + {@link JobConf#setOutputValueGroupingComparator(Class)}.Since + {@link JobConf#setOutputKeyComparatorClass(Class)} can be used to + control how intermediate keys are grouped, these can be used in conjunction + to simulate secondary sort on values.

      + + + For example, say that you want to find duplicate web pages and tag them + all with the url of the "best" known example. You would set up the job + like: +
        +
      • Map Input Key: url
      • +
      • Map Input Value: document
      • +
      • Map Output Key: document checksum, url pagerank
      • +
      • Map Output Value: url
      • +
      • Partitioner: by checksum
      • +
      • OutputKeyComparator: by checksum and then decreasing pagerank
      • +
      • OutputValueGroupingComparator: by checksum
      • +
      +
    4. + +
    5. +

      Reduce

      + +

      In this phase the + {@link #reduce(Object, Iterator, OutputCollector, Reporter)} + method is called for each <key, (list of values)> pair in + the grouped inputs.

      +

      The output of the reduce task is typically written to the + {@link FileSystem} via + {@link OutputCollector#collect(Object, Object)}.

      +
    6. +
    + +

    The output of the Reducer is not re-sorted.

    + +

    Example:

    +

    +     public class MyReducer<K extends WritableComparable, V extends Writable> 
    +     extends MapReduceBase implements Reducer<K, V, K, V> {
    +     
    +       static enum MyCounters { NUM_RECORDS }
    +        
    +       private String reduceTaskId;
    +       private int noKeys = 0;
    +       
    +       public void configure(JobConf job) {
    +         reduceTaskId = job.get("mapred.task.id");
    +       }
    +       
    +       public void reduce(K key, Iterator<V> values,
    +                          OutputCollector<K, V> output, 
    +                          Reporter reporter)
    +       throws IOException {
    +       
    +         // Process
    +         int noValues = 0;
    +         while (values.hasNext()) {
    +           V value = values.next();
    +           
    +           // Increment the no. of values for this key
    +           ++noValues;
    +           
    +           // Process the <key, value> pair (assume this takes a while)
    +           // ...
    +           // ...
    +           
    +           // Let the framework know that we are alive, and kicking!
    +           if ((noValues%10) == 0) {
    +             reporter.progress();
    +           }
    +         
    +           // Process some more
    +           // ...
    +           // ...
    +           
    +           // Output the <key, value> 
    +           output.collect(key, value);
    +         }
    +         
    +         // Increment the no. of <key, list of values> pairs processed
    +         ++noKeys;
    +         
    +         // Increment counters
    +         reporter.incrCounter(NUM_RECORDS, 1);
    +         
    +         // Every 100 keys update application-level status
    +         if ((noKeys%100) == 0) {
    +           reporter.setStatus(reduceTaskId + " processed " + noKeys);
    +         }
    +       }
    +     }
    + 

    + + @see Mapper + @see Partitioner + @see Reporter + @see MapReduceBase]]> +
    +
    + + + + + + + + + + + + + + + Counter of the given group/name.]]> + + + + + + + Enum. + @param amount A non-negative amount by which the counter is to + be incremented.]]> + + + + + + + + + + + + + + InputSplit that the map is reading from. + @throws UnsupportedOperationException if called outside a mapper]]> + + + + + + + + + {@link Mapper} and {@link Reducer} can use the Reporter + provided to report progress or just indicate that they are alive. In + scenarios where the application takes an insignificant amount of time to + process individual key/value pairs, this is crucial since the framework + might assume that the task has timed-out and kill that task. + +

    Applications can also update {@link Counters} via the provided + Reporter .

    + + @see Progressable + @see Counters]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + progress of the job's map-tasks, as a float between 0.0 + and 1.0. When all map tasks have completed, the function returns 1.0. + + @return the progress of the job's map-tasks. + @throws IOException]]> + + + + + + progress of the job's reduce-tasks, as a float between 0.0 + and 1.0. When all reduce tasks have completed, the function returns 1.0. + + @return the progress of the job's reduce-tasks. + @throws IOException]]> + + + + + + progress of the job's cleanup-tasks, as a float between 0.0 + and 1.0. When all cleanup tasks have completed, the function returns 1.0. + + @return the progress of the job's cleanup-tasks. + @throws IOException]]> + + + + + + progress of the job's setup-tasks, as a float between 0.0 + and 1.0. When all setup tasks have completed, the function returns 1.0. + + @return the progress of the job's setup-tasks. + @throws IOException]]> + + + + + + true if the job is complete, else false. + @throws IOException]]> + + + + + + true if the job succeeded, else false. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RunningJob is the user-interface to query for details on a + running Map-Reduce job. + +

    Clients can get hold of RunningJob via the {@link JobClient} + and then query the running-job for details such as name, configuration, + progress etc.

    + + @see JobClient]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This allows the user to specify the key class to be different + from the actual class ({@link BytesWritable}) used for writing

    + + @param conf the {@link JobConf} to modify + @param theClass the SequenceFile output key class.]]> +
    +
    + + + + + This allows the user to specify the value class to be different + from the actual class ({@link BytesWritable}) used for writing

    + + @param conf the {@link JobConf} to modify + @param theClass the SequenceFile output key class.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + f. The filtering criteria is + MD5(key) % f == 0.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + f using + the criteria record# % f == 0. + For example, if the frequency is 10, one out of 10 records is returned.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if auto increment + {@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS}. + false otherwise.]]> + + + + + + + + + + + + + true if auto increment + {@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS}. + false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Hadoop provides an optional mode of execution in which the bad records + are detected and skipped in further attempts. + +

    This feature can be used when map/reduce tasks crashes deterministically on + certain input. This happens due to bugs in the map/reduce function. The usual + course would be to fix these bugs. But sometimes this is not possible; + perhaps the bug is in third party libraries for which the source code is + not available. Due to this, the task never reaches to completion even with + multiple attempts and complete data for that task is lost.

    + +

    With this feature, only a small portion of data is lost surrounding + the bad record, which may be acceptable for some user applications. + see {@link SkipBadRecords#setMapperMaxSkipRecords(Configuration, long)}

    + +

    The skipping mode gets kicked off after certain no of failures + see {@link SkipBadRecords#setAttemptsToStartSkipping(Configuration, int)}

    + +

    In the skipping mode, the map/reduce task maintains the record range which + is getting processed at all times. Before giving the input to the + map/reduce function, it sends this record range to the Task tracker. + If task crashes, the Task tracker knows which one was the last reported + range. On further attempts that range get skipped.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + all task attempt IDs + of any jobtracker, in any job, of the first + map task, we would use : +
     
    + TaskAttemptID.getTaskAttemptIDsPattern(null, null, true, 1, null);
    + 
    + which will return : +
     "attempt_[^_]*_[0-9]*_m_000001_[0-9]*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @param isMap whether the tip is a map, or null + @param taskId taskId number, or null + @param attemptId the task attempt number, or null + @return a regex pattern matching TaskAttemptIDs]]> +
    +
    + + + An example TaskAttemptID is : + attempt_200707121733_0003_m_000005_0 , which represents the + zeroth task attempt for the fifth map task in the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse TaskAttemptID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the first map task + of any jobtracker, of any job, we would use : +

     
    + TaskID.getTaskIDsPattern(null, null, true, 1);
    + 
    + which will return : +
     "task_[^_]*_[0-9]*_m_000001*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @param isMap whether the tip is a map, or null + @param taskId taskId number, or null + @return a regex pattern matching TaskIDs]]> +
    + + + + An example TaskID is : + task_200707121733_0003_m_000005 , which represents the + fifth map task in the third job running at the jobtracker + started at 200707121733. +

    + Applications should never construct or parse TaskID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskAttemptID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hadoop.log.dir.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the Job was added.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ([,]*) + func ::= tbl(,"") + class ::= @see java.lang.Class#forName(java.lang.String) + path ::= @see org.apache.hadoop.fs.Path#Path(java.lang.String) + } + Reads expression from the mapred.join.expr property and + user-supplied join types from mapred.join.define.<ident> + types. Paths supplied to tbl are given as input paths to the + InputFormat class listed. + @see #compose(java.lang.String, java.lang.Class, java.lang.String...)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ,

    ) }]]> + + + + + + + + (tbl(,),tbl(,),...,tbl(,)) }]]> + + + + + + + + (tbl(,),tbl(,),...,tbl(,)) }]]> + + + + mapred.join.define.<ident> to a classname. In the expression + mapred.join.expr, the identifier will be assumed to be a + ComposableRecordReader. + mapred.join.keycomparator can be a classname used to compare keys + in the join. + @see JoinRecordReader + @see MultiFilterRecordReader]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ...... + }]]> + + + + + + + + + + + + + + + + + + + + + capacity children to position + id in the parent reader. + The id of a root CompositeRecordReader is -1 by convention, but relying + on this is not recommended.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + override(S1,S2,S3) will prefer values + from S3 over S2, and values from S2 over S1 for all keys + emitted from all sources.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [,,...,]]]> + + + + + + + out. + TupleWritable format: + {@code + ...... + }]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It has to be specified how key and values are passed from one element of + the chain to the next, by value or by reference. If a Mapper leverages the + assumed semantics that the key and values are not modified by the collector + 'by value' must be used. If the Mapper does not expect this semantics, as + an optimization to avoid serialization and deserialization 'by reference' + can be used. +

    + For the added Mapper the configuration given for it, + mapperConf, have precedence over the job's JobConf. This + precedence is in effect when the task is running. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainMapper, this is done by the addMapper for the last mapper in the chain +

    + + @param job job's JobConf to add the Mapper class. + @param klass the Mapper class to add. + @param inputKeyClass mapper input key class. + @param inputValueClass mapper input value class. + @param outputKeyClass mapper output key class. + @param outputValueClass mapper output value class. + @param byValue indicates if key/values should be passed by value + to the next Mapper in the chain, if any. + @param mapperConf a JobConf with the configuration for the Mapper + class. It is recommended to use a JobConf without default values using the + JobConf(boolean loadDefaults) constructor with FALSE.]]> + + + + + + + If this method is overriden super.configure(...) should be + invoked at the beginning of the overwriter method.]]> + + + + + + + + + + map(...) methods of the Mappers in the chain.]]> + + + + + + + If this method is overriden super.close() should be + invoked at the end of the overwriter method.]]> + + + + + The Mapper classes are invoked in a chained (or piped) fashion, the output of + the first becomes the input of the second, and so on until the last Mapper, + the output of the last Mapper will be written to the task's output. +

    + The key functionality of this feature is that the Mappers in the chain do not + need to be aware that they are executed in a chain. This enables having + reusable specialized Mappers that can be combined to perform composite + operations within a single task. +

    + Special care has to be taken when creating chains that the key/values output + by a Mapper are valid for the following Mapper in the chain. It is assumed + all Mappers and the Reduce in the chain use maching output and input key and + value classes as no conversion is done by the chaining code. +

    + Using the ChainMapper and the ChainReducer classes is possible to compose + Map/Reduce jobs that look like [MAP+ / REDUCE MAP*]. And + immediate benefit of this pattern is a dramatic reduction in disk IO. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainMapper, this is done by the addMapper for the last mapper in the chain. +

    + ChainMapper usage pattern: +

    +

    + ...
    + conf.setJobName("chain");
    + conf.setInputFormat(TextInputFormat.class);
    + conf.setOutputFormat(TextOutputFormat.class);
    + 

    + JobConf mapAConf = new JobConf(false); + ... + ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class, + Text.class, Text.class, true, mapAConf); +

    + JobConf mapBConf = new JobConf(false); + ... + ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class, + LongWritable.class, Text.class, false, mapBConf); +

    + JobConf reduceConf = new JobConf(false); + ... + ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class, + Text.class, Text.class, true, reduceConf); +

    + ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class, + LongWritable.class, Text.class, false, null); +

    + ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class, + LongWritable.class, LongWritable.class, true, null); +

    + FileInputFormat.setInputPaths(conf, inDir); + FileOutputFormat.setOutputPath(conf, outDir); + ... +

    + JobClient jc = new JobClient(conf); + RunningJob job = jc.submitJob(conf); + ... +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + It has to be specified how key and values are passed from one element of + the chain to the next, by value or by reference. If a Reducer leverages the + assumed semantics that the key and values are not modified by the collector + 'by value' must be used. If the Reducer does not expect this semantics, as + an optimization to avoid serialization and deserialization 'by reference' + can be used. +

    + For the added Reducer the configuration given for it, + reducerConf, have precedence over the job's JobConf. This + precedence is in effect when the task is running. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainReducer, this is done by the setReducer or the addMapper for the last + element in the chain. + + @param job job's JobConf to add the Reducer class. + @param klass the Reducer class to add. + @param inputKeyClass reducer input key class. + @param inputValueClass reducer input value class. + @param outputKeyClass reducer output key class. + @param outputValueClass reducer output value class. + @param byValue indicates if key/values should be passed by value + to the next Mapper in the chain, if any. + @param reducerConf a JobConf with the configuration for the Reducer + class. It is recommended to use a JobConf without default values using the + JobConf(boolean loadDefaults) constructor with FALSE.]]> + + + + + + + + + + + + + + It has to be specified how key and values are passed from one element of + the chain to the next, by value or by reference. If a Mapper leverages the + assumed semantics that the key and values are not modified by the collector + 'by value' must be used. If the Mapper does not expect this semantics, as + an optimization to avoid serialization and deserialization 'by reference' + can be used. +

    + For the added Mapper the configuration given for it, + mapperConf, have precedence over the job's JobConf. This + precedence is in effect when the task is running. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainMapper, this is done by the addMapper for the last mapper in the chain + . + + @param job chain job's JobConf to add the Mapper class. + @param klass the Mapper class to add. + @param inputKeyClass mapper input key class. + @param inputValueClass mapper input value class. + @param outputKeyClass mapper output key class. + @param outputValueClass mapper output value class. + @param byValue indicates if key/values should be passed by value + to the next Mapper in the chain, if any. + @param mapperConf a JobConf with the configuration for the Mapper + class. It is recommended to use a JobConf without default values using the + JobConf(boolean loadDefaults) constructor with FALSE.]]> + + + + + + + If this method is overriden super.configure(...) should be + invoked at the beginning of the overwriter method.]]> + + + + + + + + + + reduce(...) method of the Reducer with the + map(...) methods of the Mappers in the chain.]]> + + + + + + + If this method is overriden super.close() should be + invoked at the end of the overwriter method.]]> + + + + + For each record output by the Reducer, the Mapper classes are invoked in a + chained (or piped) fashion, the output of the first becomes the input of the + second, and so on until the last Mapper, the output of the last Mapper will + be written to the task's output. +

    + The key functionality of this feature is that the Mappers in the chain do not + need to be aware that they are executed after the Reducer or in a chain. + This enables having reusable specialized Mappers that can be combined to + perform composite operations within a single task. +

    + Special care has to be taken when creating chains that the key/values output + by a Mapper are valid for the following Mapper in the chain. It is assumed + all Mappers and the Reduce in the chain use maching output and input key and + value classes as no conversion is done by the chaining code. +

    + Using the ChainMapper and the ChainReducer classes is possible to compose + Map/Reduce jobs that look like [MAP+ / REDUCE MAP*]. And + immediate benefit of this pattern is a dramatic reduction in disk IO. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainReducer, this is done by the setReducer or the addMapper for the last + element in the chain. +

    + ChainReducer usage pattern: +

    +

    + ...
    + conf.setJobName("chain");
    + conf.setInputFormat(TextInputFormat.class);
    + conf.setOutputFormat(TextOutputFormat.class);
    + 

    + JobConf mapAConf = new JobConf(false); + ... + ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class, + Text.class, Text.class, true, mapAConf); +

    + JobConf mapBConf = new JobConf(false); + ... + ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class, + LongWritable.class, Text.class, false, mapBConf); +

    + JobConf reduceConf = new JobConf(false); + ... + ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class, + Text.class, Text.class, true, reduceConf); +

    + ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class, + LongWritable.class, Text.class, false, null); +

    + ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class, + LongWritable.class, LongWritable.class, true, null); +

    + FileInputFormat.setInputPaths(conf, inDir); + FileOutputFormat.setOutputPath(conf, outDir); + ... +

    + JobClient jc = new JobClient(conf); + RunningJob job = jc.submitJob(conf); + ... +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + all splits. + @param freq The frequency with which records will be emitted.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + all splits. + This will read every split at the client, which is very expensive. + @param freq Probability with which a key will be chosen. + @param numSamples Total number of samples to obtain from all selected + splits.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + all splits. + Takes the first numSamples / numSplits records from each split. + @param numSamples Total number of samples to obtain from all selected + splits.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the name output is multi, false + if it is single. If the name output is not defined it returns + false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @param conf job conf to add the named output + @param namedOutput named output name, it has to be a word, letters + and numbers only, cannot be the word 'part' as + that is reserved for the + default output. + @param outputFormatClass OutputFormat class. + @param keyClass key class + @param valueClass value class]]> + + + + + + + + + + + + @param conf job conf to add the named output + @param namedOutput named output name, it has to be a word, letters + and numbers only, cannot be the word 'part' as + that is reserved for the + default output. + @param outputFormatClass OutputFormat class. + @param keyClass key class + @param valueClass value class]]> + + + + + + + + By default these counters are disabled. +

    + MultipleOutputs supports counters, by default the are disabled. + The counters group is the {@link MultipleOutputs} class name. +

    + The names of the counters are the same as the named outputs. For multi + named outputs the name of the counter is the concatenation of the named + output, and underscore '_' and the multiname. + + @param conf job conf to enableadd the named output. + @param enabled indicates if the counters will be enabled or not.]]> +
    +
    + + + + + By default these counters are disabled. +

    + MultipleOutputs supports counters, by default the are disabled. + The counters group is the {@link MultipleOutputs} class name. +

    + The names of the counters are the same as the named outputs. For multi + named outputs the name of the counter is the concatenation of the named + output, and underscore '_' and the multiname. + + + @param conf job conf to enableadd the named output. + @return TRUE if the counters are enabled, FALSE if they are disabled.]]> +
    +
    + + + + + + + + + + + + + @param namedOutput the named output name + @param reporter the reporter + @return the output collector for the given named output + @throws IOException thrown if output collector could not be created]]> + + + + + + + + + + + @param namedOutput the named output name + @param multiName the multi name part + @param reporter the reporter + @return the output collector for the given named output + @throws IOException thrown if output collector could not be created]]> + + + + + + + If overriden subclasses must invoke super.close() at the + end of their close() + + @throws java.io.IOException thrown if any of the MultipleOutput files + could not be closed properly.]]> + + + + OutputCollector passed to + the map() and reduce() methods of the + Mapper and Reducer implementations. +

    + Each additional output, or named output, may be configured with its own + OutputFormat, with its own key class and with its own value + class. +

    + A named output can be a single file or a multi file. The later is refered as + a multi named output. +

    + A multi named output is an unbound set of files all sharing the same + OutputFormat, key class and value class configuration. +

    + When named outputs are used within a Mapper implementation, + key/values written to a name output are not part of the reduce phase, only + key/values written to the job OutputCollector are part of the + reduce phase. +

    + MultipleOutputs supports counters, by default the are disabled. The counters + group is the {@link MultipleOutputs} class name. +

    + The names of the counters are the same as the named outputs. For multi + named outputs the name of the counter is the concatenation of the named + output, and underscore '_' and the multiname. +

    + Job configuration usage pattern is: +

    +
    + JobConf conf = new JobConf();
    +
    + conf.setInputPath(inDir);
    + FileOutputFormat.setOutputPath(conf, outDir);
    +
    + conf.setMapperClass(MOMap.class);
    + conf.setReducerClass(MOReduce.class);
    + ...
    +
    + // Defines additional single text based output 'text' for the job
    + MultipleOutputs.addNamedOutput(conf, "text", TextOutputFormat.class,
    + LongWritable.class, Text.class);
    +
    + // Defines additional multi sequencefile based output 'sequence' for the
    + // job
    + MultipleOutputs.addMultiNamedOutput(conf, "seq",
    +   SequenceFileOutputFormat.class,
    +   LongWritable.class, Text.class);
    + ...
    +
    + JobClient jc = new JobClient();
    + RunningJob job = jc.submitJob(conf);
    +
    + ...
    + 
    +

    + Job configuration usage pattern is: +

    +
    + public class MOReduce implements
    +   Reducer<WritableComparable, Writable> {
    + private MultipleOutputs mos;
    +
    + public void configure(JobConf conf) {
    + ...
    + mos = new MultipleOutputs(conf);
    + }
    +
    + public void reduce(WritableComparable key, Iterator<Writable> values,
    + OutputCollector output, Reporter reporter)
    + throws IOException {
    + ...
    + mos.getCollector("text", reporter).collect(key, new Text("Hello"));
    + mos.getCollector("seq", "A", reporter).collect(key, new Text("Bye"));
    + mos.getCollector("seq", "B", reporter).collect(key, new Text("Chau"));
    + ...
    + }
    +
    + public void close() throws IOException {
    + mos.close();
    + ...
    + }
    +
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It can be used instead of the default implementation, + @link org.apache.hadoop.mapred.MapRunner, when the Map operation is not CPU + bound in order to improve throughput. +

    + Map implementations using this MapRunnable must be thread-safe. +

    + The Map-Reduce job has to be configured to use this MapRunnable class (using + the JobConf.setMapRunnerClass method) and + the number of thread the thread-pool can use with the + mapred.map.multithreadedrunner.threads property, its default + value is 10 threads. +

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + pairs. Uses + {@link StringTokenizer} to break text into tokens.]]> + + + + + + + + + + + + total.order.partitioner.natural.order is not false, a trie + of the first total.order.partitioner.max.trie.depth(2) + 1 bytes + will be built. Otherwise, keys will be located using a binary search of + the partition keyset using the {@link org.apache.hadoop.io.RawComparator} + defined for this job. The input file must be sorted with the same + comparator and contain {@link + org.apache.hadoop.mapred.JobConf#getNumReduceTasks} - 1 keys.]]> + + + + + + + + + + + + R reduces, there are R-1 + keys in the SequenceFile.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + generateKeyValPairs(Object key, Object value); public void + configure(JobConfjob); } + + The package also provides a base class, ValueAggregatorBaseDescriptor, + implementing the above interface. The user can extend the base class and + implement generateKeyValPairs accordingly. + + The primary work of generateKeyValPairs is to emit one or more key/value + pairs based on the input key/value pair. The key in an output key/value pair + encode two pieces of information: aggregation type and aggregation id. The + value will be aggregated onto the aggregation id according the aggregation + type. + + This class offers a function to generate a map/reduce job using Aggregate + framework. The function takes the following parameters: input directory spec + input format (text or sequence file) output directory a file specifying the + user plugin class]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The job can be configured using the static methods in this class, + {@link DBInputFormat}, and {@link DBOutputFormat}. +

    + Alternatively, the properties can be set in the configuration with proper + values. + + @see DBConfiguration#configureDB(JobConf, String, String, String, String) + @see DBInputFormat#setInput(JobConf, Class, String, String) + @see DBInputFormat#setInput(JobConf, Class, String, String, String, String...) + @see DBOutputFormat#setOutput(JobConf, String, String...)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 20070101 AND length > 0)' + @param orderBy the fieldNames in the orderBy clause. + @param fieldNames The field names in the table + @see #setInput(JobConf, Class, String, String)]]> + + + + + + + + + + + + + + DBInputFormat emits LongWritables containing the record number as + key and DBWritables as value. + + The SQL query, and input class can be using one of the two + setInput methods.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {@link DBOutputFormat} accepts <key,value> pairs, where + key has a type extending DBWritable. Returned {@link RecordWriter} + writes only the key to the database with a batch SQL query.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DBWritable. DBWritable, is similar to {@link Writable} + except that the {@link #write(PreparedStatement)} method takes a + {@link PreparedStatement}, and {@link #readFields(ResultSet)} + takes a {@link ResultSet}. +

    + Implementations are responsible for writing the fields of the object + to PreparedStatement, and reading the fields of the object from the + ResultSet. + +

    Example:

    + If we have the following table in the database : +
    + CREATE TABLE MyTable (
    +   counter        INTEGER NOT NULL,
    +   timestamp      BIGINT  NOT NULL,
    + );
    + 
    + then we can read/write the tuples from/to the table with : +

    + public class MyWritable implements Writable, DBWritable {
    +   // Some data     
    +   private int counter;
    +   private long timestamp;
    +       
    +   //Writable#write() implementation
    +   public void write(DataOutput out) throws IOException {
    +     out.writeInt(counter);
    +     out.writeLong(timestamp);
    +   }
    +       
    +   //Writable#readFields() implementation
    +   public void readFields(DataInput in) throws IOException {
    +     counter = in.readInt();
    +     timestamp = in.readLong();
    +   }
    +       
    +   public void write(PreparedStatement statement) throws SQLException {
    +     statement.setInt(1, counter);
    +     statement.setLong(2, timestamp);
    +   }
    +       
    +   public void readFields(ResultSet resultSet) throws SQLException {
    +     counter = resultSet.getInt(1);
    +     timestamp = resultSet.getLong(2);
    +   } 
    + }
    + 

    ]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When constructing the instance, if the factory property + contextName.class exists, + its value is taken to be the name of the class to instantiate. Otherwise, + the default is to create an instance of + org.apache.hadoop.metrics.spi.NullContext, which is a + dummy "no-op" context which will cause all metric data to be discarded. + + @param contextName the name of the context + @return the named MetricsContext]]> + + + + + + + + + + + + + + When the instance is constructed, this method checks if the file + hadoop-metrics.properties exists on the class path. If it + exists, it must be in the format defined by java.util.Properties, and all + the properties in the file are set as attributes on the newly created + ContextFactory instance. + + @return the singleton ContextFactory instance]]> + + + + getFactory() method.]]> + + + + + + + + + + + + + + + + + + + startMonitoring() again after calling + this. + @see #close()]]> + + + + + + + + + + + + + + + + recordName. + Throws an exception if the metrics implementation is configured with a fixed + set of record names and recordName is not in that set. + + @param recordName the name of the record + @throws MetricsException if recordName conflicts with configuration data]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A record name identifies the kind of data to be reported. For example, a + program reporting statistics relating to the disks on a computer might use + a record name "diskStats".

    + + A record has zero or more tags. A tag has a name and a value. To + continue the example, the "diskStats" record might use a tag named + "diskName" to identify a particular disk. Sometimes it is useful to have + more than one tag, so there might also be a "diskType" with value "ide" or + "scsi" or whatever.

    + + A record also has zero or more metrics. These are the named + values that are to be reported to the metrics system. In the "diskStats" + example, possible metric names would be "diskPercentFull", "diskPercentBusy", + "kbReadPerSecond", etc.

    + + The general procedure for using a MetricsRecord is to fill in its tag and + metric values, and then call update() to pass the record to the + client library. + Metric data is not immediately sent to the metrics system + each time that update() is called. + An internal table is maintained, identified by the record name. This + table has columns + corresponding to the tag and the metric names, and rows + corresponding to each unique set of tag values. An update + either modifies an existing row in the table, or adds a new row with a set of + tag values that are different from all the other rows. Note that if there + are no tags, then there can be at most one row in the table.

    + + Once a row is added to the table, its data will be sent to the metrics system + on every timer period, whether or not it has been updated since the previous + timer period. If this is inappropriate, for example if metrics were being + reported by some transient object in an application, the remove() + method can be used to remove the row and thus stop the data from being + sent.

    + + Note that the update() method is atomic. This means that it is + safe for different threads to be updating the same metric. More precisely, + it is OK for different threads to call update() on MetricsRecord instances + with the same set of tag names and tag values. Different threads should + not use the same MetricsRecord instance at the same time.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MetricsContext.registerUpdater().]]> + + + + + + + + + + + + + + + + + + + + + + + + + fileName attribute, + if specified. Otherwise the data will be written to standard + output.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is configured by setting ContextFactory attributes which in turn + are usually configured through a properties file. All the attributes are + prefixed by the contextName. For example, the properties file might contain: +

    + myContextName.fileName=/tmp/metrics.log
    + myContextName.period=5
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + contextName.tableName. The returned map consists of + those attributes with the contextName and tableName stripped off.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + recordName. + Throws an exception if the metrics implementation is configured with a fixed + set of record names and recordName is not in that set. + + @param recordName the name of the record + @throws MetricsException if recordName conflicts with configuration data]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class implements the internal table of metric data, and the timer + on which data is to be sent to the metrics system. Subclasses must + override the abstract emitRecord method in order to transmit + the data.

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + update + and remove().]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hostname or hostname:port. If + the specs string is null, defaults to localhost:defaultPort. + + @return a list of InetSocketAddress objects.]]> + + + + + + + + + + + + + + + + + + + ,name=" + Where the and are the supplied parameters + + @param serviceName + @param nameName + @param theMbean - the MBean to register + @return the named used to register the MBean]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hadoop.rpc.socket.factory.class.<ClassName>. When no + such parameter exists then fall back on the default socket factory as + configured by hadoop.rpc.socket.factory.class.default. If + this default socket factory is not configured, then fall back on the JVM + default socket factory. + + @param conf the configuration + @param clazz the class (usually a {@link VersionedProtocol}) + @return a socket factory]]> + + + + + + hadoop.rpc.socket.factory.default + + @param conf the configuration + @return the default socket factory as specified in the configuration or + the JVM default socket factory if the configuration does not + contain a default socket factory property.]]> + + + + + + + + + + + + + : + ://:/]]> + + + + + + + + : + ://:/]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + From documentation for {@link #getInputStream(Socket, long)}:
    + Returns InputStream for the socket. If the socket has an associated + SocketChannel then it returns a + {@link SocketInputStream} with the given timeout. If the socket does not + have a channel, {@link Socket#getInputStream()} is returned. In the later + case, the timeout argument is ignored and the timeout set with + {@link Socket#setSoTimeout(int)} applies for reads.

    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getInputStream()}. + + @see #getInputStream(Socket, long) + + @param socket + @return InputStream for reading from the socket. + @throws IOException]]> +
    +
    + + + + + +
    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getInputStream()}. + + @see Socket#getChannel() + + @param socket + @param timeout timeout in milliseconds. This may not always apply. zero + for waiting as long as necessary. + @return InputStream for reading from the socket. + @throws IOException]]> +
    +
    + + + + +
    + + From documentation for {@link #getOutputStream(Socket, long)} :
    + Returns OutputStream for the socket. If the socket has an associated + SocketChannel then it returns a + {@link SocketOutputStream} with the given timeout. If the socket does not + have a channel, {@link Socket#getOutputStream()} is returned. In the later + case, the timeout argument is ignored and the write will wait until + data is available.

    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getOutputStream()}. + + @see #getOutputStream(Socket, long) + + @param socket + @return OutputStream for writing to the socket. + @throws IOException]]> +
    +
    + + + + + +
    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getOutputStream()}. + + @see Socket#getChannel() + + @param socket + @param timeout timeout in milliseconds. This may not always apply. zero + for waiting as long as necessary. + @return OutputStream for writing to the socket. + @throws IOException]]> +
    +
    + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + node + + @param node + a node + @return true if node is already in the tree; false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + scope + if scope starts with ~, choose one from the all nodes except for the + ones in scope; otherwise, choose one from scope + @param scope range of nodes from which a node will be choosen + @return the choosen node]]> + + + + + + + scope but not in excludedNodes + if scope starts with ~, return the number of nodes that are not + in scope and excludedNodes; + @param scope a path string that may start with ~ + @param excludedNodes a list of nodes + @return number of available nodes]]> + + + + + + + + + + + + reader + It linearly scans the array, if a local node is found, swap it with + the first element of the array. + If a local rack node is found, swap it with the first element following + the local node. + If neither local node or local rack node is found, put a random replica + location at postion 0. + It leaves the rest nodes untouched.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + Create a new input stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + + @see SocketInputStream#SocketInputStream(ReadableByteChannel, long) + + @param socket should have a channel associated with it. + @param timeout timeout timeout in milliseconds. must not be negative. + @throws IOException]]> +
    +
    + + + +
    + + Create a new input stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + @see SocketInputStream#SocketInputStream(ReadableByteChannel, long) + + @param socket should have a channel associated with it. + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + +
    + + Create a new ouput stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + + @see SocketOutputStream#SocketOutputStream(WritableByteChannel, long) + + @param socket should have a channel associated with it. + @param timeout timeout timeout in milliseconds. must not be negative. + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + = getCount(). + @param newCapacity The new capacity in bytes.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Index idx = startVector(...); + while (!idx.done()) { + .... // read element of a vector + idx.incr(); + } + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This task takes the given record definition files and compiles them into + java or c++ + files. It is then up to the user to compile the generated files. + +

    The task requires the file or the nested fileset element to be + specified. Optional attributes are language (set the output + language, default is "java"), + destdir (name of the destination directory for generated java/c++ + code, default is ".") and failonerror (specifies error handling + behavior. default is true). +

    Usage

    +
    + <recordcc
    +       destdir="${basedir}/gensrc"
    +       language="java">
    +   <fileset include="**\/*.jr" />
    + </recordcc>
    + 
    ]]> +
    +
    + +
    + + + + + + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ugi as a comma separated string in + conf as a property attr + + The String starts with the user name followed by the default group names, + and other group names. + + @param conf configuration + @param attr property name + @param ugi a UnixUserGroupInformation]]> + + + + + + + + conf + + The object is expected to store with the property name attr + as a comma separated string that starts + with the user name followed by group names. + If the property name is not defined, return null. + It's assumed that there is only one UGI per user. If this user already + has a UGI in the ugi map, return the ugi in the map. + Otherwise, construct a UGI from the configuration, store it in the + ugi map and return it. + + @param conf configuration + @param attr property name + @return a UnixUGI + @throws LoginException if the stored string is ill-formatted.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This tool supports archiving and anaylzing (sort/grep) of log-files. + It takes as input + a) Input uri which will serve uris of the logs to be archived. + b) Output directory (not mandatory). + b) Directory on dfs to archive the logs. + c) The sort/grep patterns for analyzing the files and separator for boundaries. + Usage: + Logalyzer -archive -archiveDir -analysis -logs -grep -sort -separator +

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + in]]> + + + + + + + out.]]> + + + + + + + + + + reset is true, then resets the checksum. + @return number of bytes written. Will be equal to getChecksumSize();]]> + + + + + + + + + reset is true, then resets the checksum. + @return number of bytes written. Will be equal to getChecksumSize();]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GenericOptionsParser to parse only the generic Hadoop + arguments. + + The array of string arguments other than the generic arguments can be + obtained by {@link #getRemainingArgs()}. + + @param conf the Configuration to modify. + @param args command-line arguments.]]> + + + + + GenericOptionsParser to parse given options as well + as generic Hadoop options. + + The resulting CommandLine object can be obtained by + {@link #getCommandLine()}. + + @param conf the configuration to modify + @param options options built by the caller + @param args User-specified arguments]]> + + + + + Strings containing the un-parsed arguments + or empty array if commandLine was not defined.]]> + + + + + CommandLine object + to process the parsed arguments. + + Note: If the object is created with + {@link #GenericOptionsParser(Configuration, String[])}, then returned + object will only contain parsed generic options. + + @return CommandLine representing list of arguments + parsed against Options descriptor.]]> + + + + + + + + + + + + + + + + + GenericOptionsParser is a utility to parse command line + arguments generic to the Hadoop framework. + + GenericOptionsParser recognizes several standarad command + line arguments, enabling applications to easily specify a namenode, a + jobtracker, additional configuration resources etc. + +

    Generic Options

    + +

    The supported generic options are:

    +

    +     -conf <configuration file>     specify a configuration file
    +     -D <property=value>            use value for given property
    +     -fs <local|namenode:port>      specify a namenode
    +     -jt <local|jobtracker:port>    specify a job tracker
    +     -files <comma separated list of files>    specify comma separated
    +                            files to be copied to the map reduce cluster
    +     -libjars <comma separated list of jars>   specify comma separated
    +                            jar files to include in the classpath.
    +     -archives <comma separated list of archives>    specify comma
    +             separated archives to be unarchived on the compute machines.
    +
    + 

    + +

    The general command line syntax is:

    +

    + bin/hadoop command [genericOptions] [commandOptions]
    + 

    + +

    Generic command line arguments might modify + Configuration objects, given to constructors.

    + +

    The functionality is implemented using Commons CLI.

    + +

    Examples:

    +

    + $ bin/hadoop dfs -fs darwin:8020 -ls /data
    + list /data directory in dfs with namenode darwin:8020
    + 
    + $ bin/hadoop dfs -D fs.default.name=darwin:8020 -ls /data
    + list /data directory in dfs with namenode darwin:8020
    +     
    + $ bin/hadoop dfs -conf hadoop-site.xml -ls /data
    + list /data directory in dfs with conf specified in hadoop-site.xml
    +     
    + $ bin/hadoop job -D mapred.job.tracker=darwin:50020 -submit job.xml
    + submit a job to job tracker darwin:50020
    +     
    + $ bin/hadoop job -jt darwin:50020 -submit job.xml
    + submit a job to job tracker darwin:50020
    +     
    + $ bin/hadoop job -jt local -submit job.xml
    + submit a job to local runner
    + 
    + $ bin/hadoop jar -libjars testlib.jar 
    + -archives test.tgz -files file.txt inputjar args
    + job submission with libjars, files and archives
    + 

    + + @see Tool + @see ToolRunner]]> +
    +
    + + + + + + + + + Class<T>) of the + argument of type T. + @param The type of the argument + @param t the object to get it class + @return Class<T>]]> + + + + + + + List<T> to a an array of + T[]. + @param c the Class object of the items in the list + @param list the list to convert]]> + + + + + + List<T> to a an array of + T[]. + @param list the list to convert + @throws ArrayIndexOutOfBoundsException if the list is empty. + Use {@link #toArray(Class, List)} if the list may be empty.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + io.file.buffer.size specified in the given + Configuration. + @param in input stream + @param conf configuration + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if native-hadoop is loaded, + else false]]> + + + + + + true if native hadoop libraries, if present, can be + used for this job; false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { pq.top().change(); pq.adjustTop(); } + instead of
    +  { o = pq.pop(); o.change(); pq.push(o); }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Clients and/or applications can use the provided Progressable + to explicitly report progress to the Hadoop framework. This is especially + important for operations which take an insignificant amount of time since, + in-lieu of the reported progress, the framework has to assume that an error + has occured and time-out the operation.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Class is to be obtained + @return the correctly typed Class of the given object.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Hadoop Pipes + or Hadoop Streaming. + + It also checks to ensure that we are running on a *nix platform else + (e.g. in Cygwin/Windows) it returns null. + @param conf configuration + @return a String[] with the ulimit command arguments or + null if we are running on a non *nix platform or + if the limit is unspecified.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Shell interface. + @param cmd shell command to execute. + @return the output of the executed command.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + Shell can be used to run unix commands like du or + df. It also offers facilities to gate commands by + time-intervals.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ShellCommandExecutorshould be used in cases where the output + of the command needs no explicit parsing and where the command, working + directory and the environment remains unchanged. The output of the command + is stored as-is and is expected to be small.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ArrayList of string values]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + charToEscape in the string + with the escape char escapeChar + + @param str string + @param escapeChar escape char + @param charToEscape the char to be escaped + @return an escaped string]]> + + + + + + + + + + + + + + + + + + + + + + charToEscape in the string + with the escape char escapeChar + + @param str string + @param escapeChar escape char + @param charToEscape the escaped char + @return an unescaped string]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tool, is the standard for any Map-Reduce tool/application. + The tool/application should delegate the handling of + + standard command-line options to {@link ToolRunner#run(Tool, String[])} + and only handle its custom arguments.

    + +

    Here is how a typical Tool is implemented:

    +

    +     public class MyApp extends Configured implements Tool {
    +     
    +       public int run(String[] args) throws Exception {
    +         // Configuration processed by ToolRunner
    +         Configuration conf = getConf();
    +         
    +         // Create a JobConf using the processed conf
    +         JobConf job = new JobConf(conf, MyApp.class);
    +         
    +         // Process custom command-line options
    +         Path in = new Path(args[1]);
    +         Path out = new Path(args[2]);
    +         
    +         // Specify various job-specific parameters     
    +         job.setJobName("my-app");
    +         job.setInputPath(in);
    +         job.setOutputPath(out);
    +         job.setMapperClass(MyApp.MyMapper.class);
    +         job.setReducerClass(MyApp.MyReducer.class);
    +
    +         // Submit the job, then poll for progress until the job is complete
    +         JobClient.runJob(job);
    +       }
    +       
    +       public static void main(String[] args) throws Exception {
    +         // Let ToolRunner handle generic command-line options 
    +         int res = ToolRunner.run(new Configuration(), new Sort(), args);
    +         
    +         System.exit(res);
    +       }
    +     }
    + 

    + + @see GenericOptionsParser + @see ToolRunner]]> +
    +
    + + + + + + + + + + + + Tool by {@link Tool#run(String[])}, after + parsing with the given generic arguments. Uses the given + Configuration, or builds one if null. + + Sets the Tool's configuration with the possibly modified + version of the conf. + + @param conf Configuration for the Tool. + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + Tool with its Configuration. + + Equivalent to run(tool.getConf(), tool, args). + + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + + + ToolRunner can be used to run classes implementing + Tool interface. It works in conjunction with + {@link GenericOptionsParser} to parse the + + generic hadoop command line arguments and modifies the + Configuration of the Tool. The + application-specific options are passed along without being modified. +

    + + @see Tool + @see GenericOptionsParser]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + +
    diff --git a/aarch64/share/hadoop/common/jdiff/hadoop_0.19.1.xml b/aarch64/share/hadoop/common/jdiff/hadoop_0.19.1.xml new file mode 100644 index 0000000..92bdd2c --- /dev/null +++ b/aarch64/share/hadoop/common/jdiff/hadoop_0.19.1.xml @@ -0,0 +1,44195 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + final. + + @param name resource to be added, the classpath is examined for a file + with that name.]]> + + + + + + final. + + @param url url of the resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + final. + + @param file file-path of resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + final. + + @param in InputStream to deserialize the object from.]]> + + + + + + + + + + + name property, null if + no such property exists. + + Values are processed for variable expansion + before being returned. + + @param name the property name. + @return the value of the name property, + or null if no such property exists.]]> + + + + + + name property, without doing + variable expansion. + + @param name the property name. + @return the value of the name property, + or null if no such property exists.]]> + + + + + + + value of the name property. + + @param name property name. + @param value property value.]]> + + + + + + + name property. If no such property + exists, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value, or defaultValue if the property + doesn't exist.]]> + + + + + + + name property as an int. + + If no such property exists, or if the specified value is not a valid + int, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as an int, + or defaultValue.]]> + + + + + + + name property to an int. + + @param name property name. + @param value int value of the property.]]> + + + + + + + name property as a long. + If no such property is specified, or if the specified value is not a valid + long, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a long, + or defaultValue.]]> + + + + + + + name property to a long. + + @param name property name. + @param value long value of the property.]]> + + + + + + + name property as a float. + If no such property is specified, or if the specified value is not a valid + float, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a float, + or defaultValue.]]> + + + + + + + name property as a boolean. + If no such property is specified, or if the specified value is not a valid + boolean, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a boolean, + or defaultValue.]]> + + + + + + + name property to a boolean. + + @param name property name. + @param value boolean value of the property.]]> + + + + + + + + + + + + + name property as + a collection of Strings. + If no such property is specified then empty collection is returned. +

    + This is an optimized version of {@link #getStrings(String)} + + @param name property name. + @return property value as a collection of Strings.]]> + + + + + + name property as + an array of Strings. + If no such property is specified then null is returned. + + @param name property name. + @return property value as an array of Strings, + or null.]]> + + + + + + + name property as + an array of Strings. + If no such property is specified then default value is returned. + + @param name property name. + @param defaultValue The default value + @return property value as an array of Strings, + or default value.]]> + + + + + + + name property as + as comma delimited values. + + @param name property name. + @param values The values]]> + + + + + + + + + + + + + + name property + as an array of Class. + The value of the property specifies a list of comma separated class names. + If no such property is specified, then defaultValue is + returned. + + @param name the property name. + @param defaultValue default value. + @return property value as a Class[], + or defaultValue.]]> + + + + + + + name property as a Class. + If no such property is specified, then defaultValue is + returned. + + @param name the class name. + @param defaultValue default value. + @return property value as a Class, + or defaultValue.]]> + + + + + + + + name property as a Class + implementing the interface specified by xface. + + If no such property is specified, then defaultValue is + returned. + + An exception is thrown if the returned class does not implement the named + interface. + + @param name the class name. + @param defaultValue default value. + @param xface the interface implemented by the named class. + @return property value as a Class, + or defaultValue.]]> + + + + + + + + name property to the name of a + theClass implementing the given interface xface. + + An exception is thrown if theClass does not implement the + interface xface. + + @param name property name. + @param theClass property value. + @param xface the interface implemented by the named class.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + + + + + name. + + @param name configuration resource name. + @return an input stream attached to the resource.]]> + + + + + + name. + + @param name configuration resource name. + @return a reader attached to the resource.]]> + + + + + + + + + + + + + + + String + key-value pairs in the configuration. + + @return an iterator over the entries.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + true to set quiet-mode on, false + to turn it off.]]> + + + + + + + + + + + + + + + + + + + Resources + +

    Configurations are specified by resources. A resource contains a set of + name/value pairs as XML data. Each resource is named by either a + String or by a {@link Path}. If named by a String, + then the classpath is examined for a file with that name. If named by a + Path, then the local filesystem is examined directly, without + referring to the classpath. + +

    Unless explicitly turned off, Hadoop by default specifies two + resources, loaded in-order from the classpath:

      +
    1. hadoop-default.xml + : Read-only defaults for hadoop.
    2. +
    3. hadoop-site.xml: Site-specific configuration for a given hadoop + installation.
    4. +
    + Applications may add additional resources, which are loaded + subsequent to these resources in the order they are added. + +

    Final Parameters

    + +

    Configuration parameters may be declared final. + Once a resource declares a value final, no subsequently-loaded + resource can alter that value. + For example, one might define a final parameter with: +

    +  <property>
    +    <name>dfs.client.buffer.dir</name>
    +    <value>/tmp/hadoop/dfs/client</value>
    +    <final>true</final>
    +  </property>
    + + Administrators typically define parameters as final in + hadoop-site.xml for values that user applications may not alter. + +

    Variable Expansion

    + +

    Value strings are first processed for variable expansion. The + available properties are:

      +
    1. Other properties defined in this Configuration; and, if a name is + undefined here,
    2. +
    3. Properties in {@link System#getProperties()}.
    4. +
    + +

    For example, if a configuration resource contains the following property + definitions: +

    +  <property>
    +    <name>basedir</name>
    +    <value>/user/${user.name}</value>
    +  </property>
    +  
    +  <property>
    +    <name>tempdir</name>
    +    <value>${basedir}/tmp</value>
    +  </property>
    + + When conf.get("tempdir") is called, then ${basedir} + will be resolved to another property in this Configuration, while + ${user.name} would then ordinarily be resolved to the value + of the System property with that name.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DistributedCache is a facility provided by the Map-Reduce + framework to cache files (text, archives, jars etc.) needed by applications. +

    + +

    Applications specify the files, via urls (hdfs:// or http://) to be cached + via the {@link org.apache.hadoop.mapred.JobConf}. + The DistributedCache assumes that the + files specified via hdfs:// urls are already present on the + {@link FileSystem} at the path specified by the url.

    + +

    The framework will copy the necessary files on to the slave node before + any tasks for the job are executed on that node. Its efficiency stems from + the fact that the files are only copied once per job and the ability to + cache archives which are un-archived on the slaves.

    + +

    DistributedCache can be used to distribute simple, read-only + data/text files and/or more complex types such as archives, jars etc. + Archives (zip, tar and tgz/tar.gz files) are un-archived at the slave nodes. + Jars may be optionally added to the classpath of the tasks, a rudimentary + software distribution mechanism. Files have execution permissions. + Optionally users can also direct it to symlink the distributed cache file(s) + into the working directory of the task.

    + +

    DistributedCache tracks modification timestamps of the cache + files. Clearly the cache files should not be modified by the application + or externally while the job is executing.

    + +

    Here is an illustrative example on how to use the + DistributedCache:

    +

    +     // Setting up the cache for the application
    +     
    +     1. Copy the requisite files to the FileSystem:
    +     
    +     $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat  
    +     $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip  
    +     $ bin/hadoop fs -copyFromLocal mylib.jar /myapp/mylib.jar
    +     $ bin/hadoop fs -copyFromLocal mytar.tar /myapp/mytar.tar
    +     $ bin/hadoop fs -copyFromLocal mytgz.tgz /myapp/mytgz.tgz
    +     $ bin/hadoop fs -copyFromLocal mytargz.tar.gz /myapp/mytargz.tar.gz
    +     
    +     2. Setup the application's JobConf:
    +     
    +     JobConf job = new JobConf();
    +     DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"), 
    +                                   job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/map.zip", job);
    +     DistributedCache.addFileToClassPath(new Path("/myapp/mylib.jar"), job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytar.tar", job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytgz.tgz", job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytargz.tar.gz", job);
    +     
    +     3. Use the cached files in the {@link org.apache.hadoop.mapred.Mapper}
    +     or {@link org.apache.hadoop.mapred.Reducer}:
    +     
    +     public static class MapClass extends MapReduceBase  
    +     implements Mapper<K, V, K, V> {
    +     
    +       private Path[] localArchives;
    +       private Path[] localFiles;
    +       
    +       public void configure(JobConf job) {
    +         // Get the cached archives/files
    +         localArchives = DistributedCache.getLocalCacheArchives(job);
    +         localFiles = DistributedCache.getLocalCacheFiles(job);
    +       }
    +       
    +       public void map(K key, V value, 
    +                       OutputCollector<K, V> output, Reporter reporter) 
    +       throws IOException {
    +         // Use data from the cached archives/files here
    +         // ...
    +         // ...
    +         output.collect(k, v);
    +       }
    +     }
    +     
    + 

    + + @see org.apache.hadoop.mapred.JobConf + @see org.apache.hadoop.mapred.JobClient]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BufferedFSInputStream + with the specified buffer size, + and saves its argument, the input stream + in, for later use. An internal + buffer array of length size + is created and stored in buf. + + @param in the underlying input stream. + @param size the buffer size. + @exception IllegalArgumentException if size <= 0.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + setReplication of FileSystem + @param src file name + @param replication new replication + @throws IOException + @return true if successful; + false if file does not exist or is a directory]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ' + @deprecated Consider using {@link GenericOptionsParser} instead.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + fs.scheme.class whose value names the FileSystem class. + The entire URI is passed to the FileSystem instance's initialize method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Return all the files that match filePattern and are not checksum + files. Results are sorted by their names. + +

    + A filename pattern is composed of regular characters and + special pattern matching characters, which are: + +

    +
    +
    +

    +

    ? +
    Matches any single character. + +

    +

    * +
    Matches zero or more characters. + +

    +

    [abc] +
    Matches a single character from character set + {a,b,c}. + +

    +

    [a-b] +
    Matches a single character from the character range + {a...b}. Note that character a must be + lexicographically less than or equal to character b. + +

    +

    [^a] +
    Matches a single character that is not from character set or range + {a}. Note that the ^ character must occur + immediately to the right of the opening bracket. + +

    +

    \c +
    Removes (escapes) any special meaning of character c. + +

    +

    {ab,cd} +
    Matches a string from the string set {ab, cd} + +

    +

    {ab,c{de,fh}} +
    Matches a string from the string set {ab, cde, cfh} + +
    +
    +
    + + @param pathPattern a regular expression specifying a pth pattern + + @return an array of paths that match the path pattern + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + All user code that may potentially use the Hadoop Distributed + File System should be written to use a FileSystem object. The + Hadoop DFS is a multi-machine system that appears as a single + disk. It's useful because of its fault tolerance and potentially + very large capacity. + +

    + The local implementation is {@link LocalFileSystem} and distributed + implementation is DistributedFileSystem.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FilterFileSystem contains + some other file system, which it uses as + its basic file system, possibly transforming + the data along the way or providing additional + functionality. The class FilterFileSystem + itself simply overrides all methods of + FileSystem with versions that + pass all requests to the contained file + system. Subclasses of FilterFileSystem + may further override some of these methods + and may also provide additional methods + and fields.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + buf at offset + and checksum into checksum. + The method is used for implementing read, therefore, it should be optimized + for sequential reading + @param pos chunkPos + @param buf desitination buffer + @param offset offset in buf at which to store data + @param len maximun number of bytes to read + @return number of bytes read]]> + + + + + + + + + + + + + + + + + -1 if the end of the + stream is reached. + @exception IOException if an I/O error occurs.]]> + + + + + + + + + This method implements the general contract of the corresponding + {@link InputStream#read(byte[], int, int) read} method of + the {@link InputStream} class. As an additional + convenience, it attempts to read as many bytes as possible by repeatedly + invoking the read method of the underlying stream. This + iterated read continues until one of the following + conditions becomes true:

      + +
    • The specified number of bytes have been read, + +
    • The read method of the underlying stream returns + -1, indicating end-of-file. + +
    If the first read on the underlying stream returns + -1 to indicate end-of-file then this method returns + -1. Otherwise this method returns the number of bytes + actually read. + + @param b destination buffer. + @param off offset at which to start storing bytes. + @param len maximum number of bytes to read. + @return the number of bytes read, or -1 if the end of + the stream has been reached. + @exception IOException if an I/O error occurs. + ChecksumException if any checksum error occurs]]> +
    + + + + + + + + + + + + + + + + + + n bytes of data from the + input stream. + +

    This method may skip more bytes than are remaining in the backing + file. This produces no exception and the number of bytes skipped + may include some number of bytes that were beyond the EOF of the + backing file. Attempting to read from the stream after skipping past + the end will result in -1 indicating the end of the file. + +

    If n is negative, no bytes are skipped. + + @param n the number of bytes to be skipped. + @return the actual number of bytes skipped. + @exception IOException if an I/O error occurs. + ChecksumException if the chunk to skip to is corrupted]]> + + + + + + + This method may seek past the end of the file. + This produces no exception and an attempt to read from + the stream will result in -1 indicating the end of the file. + + @param pos the postion to seek to. + @exception IOException if an I/O error occurs. + ChecksumException if the chunk to seek to is corrupted]]> + + + + + + + + + + len bytes from + stm + + @param stm an input stream + @param buf destiniation buffer + @param offset offset at which to store data + @param len number of bytes to read + @return actual number of bytes read + @throws IOException if there is any IO error]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + len bytes from the specified byte array + starting at offset off and generate a checksum for + each data chunk. + +

    This method stores bytes from the given array into this + stream's buffer before it gets checksumed. The buffer gets checksumed + and flushed to the underlying output stream when all data + in a checksum chunk are in the buffer. If the buffer is empty and + requested length is at least as large as the size of next checksum chunk + size, this method will checksum and write the chunk directly + to the underlying output stream. Thus it avoids uneccessary data copy. + + @param b the data. + @param off the start offset in the data. + @param len the number of bytes to write. + @exception IOException if an I/O error occurs.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if and only if pathname + should be included]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + trash feature. Files are moved to a user's trash + directory, a subdirectory of their home directory named ".Trash". Files are + initially moved to a current sub-directory of the trash directory. + Within that sub-directory their original path is preserved. Periodically + one may checkpoint the current trash and remove older checkpoints. (This + design permits trash management without enumeration of the full trash + content, without date support in the filesystem, and without clock + synchronization.)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} backed by an FTP client provided by Apache Commons Net. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is a tool for migrating data from an older to a newer version + of an S3 filesystem. +

    +

    + All files in the filesystem are migrated by re-writing the block metadata + - no datafiles are touched. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + Extracts AWS credentials from the filesystem URI or configuration. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A block-based {@link FileSystem} backed by + Amazon S3. +

    + @see NativeS3FileSystem]]> +
    +
    + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If f is a file, this method will make a single call to S3. + If f is a directory, this method will make a maximum of + (n / 1000) + 2 calls to S3, where n is the total number of + files and directories contained directly in f. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} for reading and writing files stored on + Amazon S3. + Unlike {@link org.apache.hadoop.fs.s3.S3FileSystem} this implementation + stores files on S3 in their + native form so they can be read by other S3 tools. +

    + @see org.apache.hadoop.fs.s3.S3FileSystem]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + . + @param name The name of the server + @param port The port to use on the server + @param findPort whether the server should start at the given port and + increment by 1 until it finds a free port. + @param conf Configuration]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + points to the log directory + "/static/" -> points to common static files (src/webapps/static) + "/" -> the jsp server code from (src/webapps/)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + nth value.]]> + + + + + + + + + + + + + + + + + + + + + nth value in the file.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + public class IntArrayWritable extends ArrayWritable { + public IntArrayWritable() { + super(IntWritable.class); + } + } + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a ByteWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new DataInputStream and + ByteArrayInputStream each time data is read. + +

    Typical usage is something like the following:

    +
    + DataInputBuffer buffer = new DataInputBuffer();
    + while (... loop condition ...) {
    +   byte[] data = ... get data ...;
    +   int dataLength = ... get data length ...;
    +   buffer.reset(data, dataLength);
    +   ... read buffer using DataInput methods ...
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new DataOutputStream and + ByteArrayOutputStream each time data is written. + +

    Typical usage is something like the following:

    +
    + DataOutputBuffer buffer = new DataOutputBuffer();
    + while (... loop condition ...) {
    +   buffer.reset();
    +   ... write buffer using DataOutput methods ...
    +   byte[] data = buffer.getData();
    +   int dataLength = buffer.getLength();
    +   ... write data to its ultimate destination ...
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + the class of the item + @param conf the configuration to store + @param item the object to be stored + @param keyName the name of the key to use + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param items the objects to be stored + @param keyName the name of the key to use + @throws IndexOutOfBoundsException if the items array is empty + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + DefaultStringifier offers convenience methods to store/load objects to/from + the configuration. + + @param the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a DoubleWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a FloatWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When two sequence files, which have same Key type but different Value + types, are mapped out to reduce, multiple Value types is not allowed. + In this case, this class can help you wrap instances with different types. +

    + +

    + Compared with ObjectWritable, this class is much more effective, + because ObjectWritable will append the class declaration as a String + into the output file in every Key-Value pair. +

    + +

    + Generic Writable implements {@link Configurable} interface, so that it will be + configured by the framework. The configuration is passed to the wrapped objects + implementing {@link Configurable} interface before deserialization. +

    + + how to use it:
    + 1. Write your own class, such as GenericObject, which extends GenericWritable.
    + 2. Implements the abstract method getTypes(), defines + the classes which will be wrapped in GenericObject in application. + Attention: this classes defined in getTypes() method, must + implement Writable interface. +

    + + The code looks like this: +
    + public class GenericObject extends GenericWritable {
    + 
    +   private static Class[] CLASSES = {
    +               ClassType1.class, 
    +               ClassType2.class,
    +               ClassType3.class,
    +               };
    +
    +   protected Class[] getTypes() {
    +       return CLASSES;
    +   }
    +
    + }
    + 
    + + @since Nov 8, 2006]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new InputStream and + ByteArrayInputStream each time data is read. + +

    Typical usage is something like the following:

    +
    + InputBuffer buffer = new InputBuffer();
    + while (... loop condition ...) {
    +   byte[] data = ... get data ...;
    +   int dataLength = ... get data length ...;
    +   buffer.reset(data, dataLength);
    +   ... read buffer using InputStream methods ...
    + }
    + 
    + @see DataInputBuffer + @see DataOutput]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a IntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + closes the input and output streams + at the end. + @param in InputStrem to read from + @param out OutputStream to write to + @param conf the Configuration object]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ignore any {@link IOException} or + null pointers. Must only be used for cleanup in exception handlers. + @param log the log to record problems to at debug level. Can be null. + @param closeables the objects to close]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a LongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A map is a directory containing two files, the data file, + containing all keys and values in the map, and a smaller index + file, containing a fraction of the keys. The fraction is determined by + {@link Writer#getIndexInterval()}. + +

    The index file is read entirely into memory. Thus key implementations + should try to keep themselves small. + +

    Map files are created by adding entries in-order. To maintain a large + database, perform updates by copying the previous version of a database and + merging in a sorted change list, to create a new version of the database in + a new file. Sorting large change lists can be done with {@link + SequenceFile.Sorter}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key and + val. Returns true if such a pair exists and false when at + the end of the map]]> + + + + + + + + + + + + + + + + key or if it does not exist, at the first entry + after the named key. + +- * @param key - key that we're trying to find +- * @param val - data value if key is found +- * @return - the key that was the closest match or null if eof.]]> + + + + + + + + + key does not exist, return + the first entry that falls just before the key. Otherwise, + return the record that sorts just after. + @return - the key that was the closest match or null if eof.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is an MD5Hash whose digest contains the + same values.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new OutputStream and + ByteArrayOutputStream each time data is written. + +

    Typical usage is something like the following:

    +
    + OutputBuffer buffer = new OutputBuffer();
    + while (... loop condition ...) {
    +   buffer.reset();
    +   ... write buffer using OutputStream methods ...
    +   byte[] data = buffer.getData();
    +   int dataLength = buffer.getLength();
    +   ... write data to its ultimate destination ...
    + }
    + 
    + @see DataOutputBuffer + @see InputBuffer]]> +
    +
    + + + + + + + + + + + + + + + A {@link Comparator} that operates directly on byte representations of + objects. +

    + @param + @see DeserializerComparator]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SequenceFiles are flat files consisting of binary key/value + pairs. + +

    SequenceFile provides {@link Writer}, {@link Reader} and + {@link Sorter} classes for writing, reading and sorting respectively.

    + + There are three SequenceFile Writers based on the + {@link CompressionType} used to compress key/value pairs: +
      +
    1. + Writer : Uncompressed records. +
    2. +
    3. + RecordCompressWriter : Record-compressed files, only compress + values. +
    4. +
    5. + BlockCompressWriter : Block-compressed files, both keys & + values are collected in 'blocks' + separately and compressed. The size of + the 'block' is configurable. +
    + +

    The actual compression algorithm used to compress key and/or values can be + specified by using the appropriate {@link CompressionCodec}.

    + +

    The recommended way is to use the static createWriter methods + provided by the SequenceFile to chose the preferred format.

    + +

    The {@link Reader} acts as the bridge and can read any of the above + SequenceFile formats.

    + +

    SequenceFile Formats

    + +

    Essentially there are 3 different formats for SequenceFiles + depending on the CompressionType specified. All of them share a + common header described below. + +

    +
      +
    • + version - 3 bytes of magic header SEQ, followed by 1 byte of actual + version number (e.g. SEQ4 or SEQ6) +
    • +
    • + keyClassName -key class +
    • +
    • + valueClassName - value class +
    • +
    • + compression - A boolean which specifies if compression is turned on for + keys/values in this file. +
    • +
    • + blockCompression - A boolean which specifies if block-compression is + turned on for keys/values in this file. +
    • +
    • + compression codec - CompressionCodec class which is used for + compression of keys and/or values (if compression is + enabled). +
    • +
    • + metadata - {@link Metadata} for this file. +
    • +
    • + sync - A sync marker to denote end of the header. +
    • +
    + +
    Uncompressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Value
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +
    Record-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Compressed Value
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +
    Block-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record Block +
        +
      • Compressed key-lengths block-size
      • +
      • Compressed key-lengths block
      • +
      • Compressed keys block-size
      • +
      • Compressed keys block
      • +
      • Compressed value-lengths block-size
      • +
      • Compressed value-lengths block
      • +
      • Compressed values block-size
      • +
      • Compressed values block
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +

    The compressed blocks of key lengths and value lengths consist of the + actual lengths of individual keys/values encoded in ZeroCompressedInteger + format.

    + + @see CompressionCodec]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key, skipping its + value. True if another entry exists, and false at end of file.]]> + + + + + + + + key and + val. Returns true if such a pair exists and false when at + end of file]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The position passed must be a position returned by {@link + SequenceFile.Writer#getLength()} when writing this file. To seek to an arbitrary + position, use {@link SequenceFile.Reader#sync(long)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SegmentDescriptor + @param segments the list of SegmentDescriptors + @param tmpDir the directory to write temporary files into + @return RawKeyValueIterator + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For best performance, applications should make sure that the {@link + Writable#readFields(DataInput)} implementation of their keys is + very efficient. In particular, it should avoid allocating memory.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This always returns a synchronized position. In other words, + immediately after calling {@link SequenceFile.Reader#seek(long)} with a position + returned by this method, {@link SequenceFile.Reader#next(Writable)} may be called. However + the key may be earlier in the file than key last written when this + method was called (e.g., with block-compression, it may be the first key + in the block that was being written when this method was called).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key. Returns + true if such a key exists and false when at the end of the set.]]> + + + + + + + key. + Returns key, or null if no match exists.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + position. Note that this + method avoids using the converter or doing String instatiation + @return the Unicode scalar value at position or -1 + if the position is invalid or points to a + trailing byte]]> + + + + + + + + + + what in the backing + buffer, starting as position start. The starting + position is measured in bytes and the return value is in + terms of byte position in the buffer. The backing buffer is + not converted to a string for this operation. + @return byte position of the first occurence of the search + string in the UTF-8 buffer or -1 if not found]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a Text with the same contents.]]> + + + + + + + + + + + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException.]]> + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException. + @return ByteBuffer: bytes stores at ByteBuffer.array() + and length is ByteBuffer.limit()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + In + addition, it provides methods for string traversal without converting the + byte array to a string.

    Also includes utilities for + serializing/deserialing a string, coding/decoding a string, checking if a + byte array contains valid UTF8 code, calculating the length of an encoded + string.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a UTF8 with the same contents.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + Also includes utilities for efficiently reading and writing UTF-8. + + @deprecated replaced by Text]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is useful when a class may evolve, so that instances written by the + old version of the class may still be processed by the new version. To + handle this situation, {@link #readFields(DataInput)} + implementations should catch {@link VersionMismatchException}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VIntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VLongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + out. + + @param out DataOuput to serialize this object into. + @throws IOException]]> + + + + + + + in. + +

    For efficiency, implementations should attempt to re-use storage in the + existing object where possible.

    + + @param in DataInput to deseriablize this object from. + @throws IOException]]> +
    + + + Any key or value type in the Hadoop Map-Reduce + framework implements this interface.

    + +

    Implementations typically implement a static read(DataInput) + method which constructs a new instance, calls {@link #readFields(DataInput)} + and returns the instance.

    + +

    Example:

    +

    +     public class MyWritable implements Writable {
    +       // Some data     
    +       private int counter;
    +       private long timestamp;
    +       
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +       
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +       
    +       public static MyWritable read(DataInput in) throws IOException {
    +         MyWritable w = new MyWritable();
    +         w.readFields(in);
    +         return w;
    +       }
    +     }
    + 

    ]]> +
    + + + + + + + + WritableComparables can be compared to each other, typically + via Comparators. Any type which is to be used as a + key in the Hadoop Map-Reduce framework should implement this + interface.

    + +

    Example:

    +

    +     public class MyWritableComparable implements WritableComparable {
    +       // Some data
    +       private int counter;
    +       private long timestamp;
    +       
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +       
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +       
    +       public int compareTo(MyWritableComparable w) {
    +         int thisValue = this.value;
    +         int thatValue = ((IntWritable)o).value;
    +         return (thisValue < thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
    +       }
    +     }
    + 

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The default implementation reads the data into two {@link + WritableComparable}s (using {@link + Writable#readFields(DataInput)}, then calls {@link + #compare(WritableComparable,WritableComparable)}.]]> + + + + + + + The default implementation uses the natural ordering, calling {@link + Comparable#compareTo(Object)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This base implemenation uses the natural ordering. To define alternate + orderings, override {@link #compare(WritableComparable,WritableComparable)}. + +

    One may optimize compare-intensive operations by overriding + {@link #compare(byte[],int,int,byte[],int,int)}. Static utility methods are + provided to assist in optimized implementations of this method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Enum type + @param in DataInput to read from + @param enumType Class type of Enum + @return Enum represented by String read from DataInput + @throws IOException]]> + + + + + + + + + + + + + + + + len number of bytes in input streamin + @param in input stream + @param len number of bytes to skip + @throws IOException when skipped less number of bytes]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CompressionCodec for which to get the + Compressor + @return Compressor for the given + CompressionCodec from the pool or a new one]]> + + + + + + CompressionCodec for which to get the + Decompressor + @return Decompressor for the given + CompressionCodec the pool or a new one]]> + + + + + + Compressor to be returned to the pool]]> + + + + + + Decompressor to be returned to the + pool]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Implementations are assumed to be buffered. This permits clients to + reposition the underlying input stream then call {@link #resetState()}, + without having to also synchronize client buffers.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + true if a preset dictionary is needed for decompression. + @return true if a preset dictionary is needed for decompression]]> + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if native-lzo library is loaded & initialized; + else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + lzo compression/decompression pair. + http://www.oberhumer.com/opensource/lzo/]]> + + + + + + + + + + + + + + + + + + + + + + + lzo compression/decompression pair compatible with lzop. + http://www.lzop.org/]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FIXME: This array should be in a private or package private location, + since it could be modified by malicious code. +

    ]]> +
    + + + + This interface is public for historical purposes. You should have no need to + use it. +

    ]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Although BZip2 headers are marked with the magic "Bz" this + constructor expects the next byte in the stream to be the first one after + the magic. Thus callers have to skip the first two bytes. Otherwise this + constructor will throw an exception. +

    + + @throws IOException + if the stream content is malformed or an I/O error occurs. + @throws NullPointerException + if in == null]]> +
    +
    + + + + + + + + + + + + + + + The decompression requires large amounts of memory. Thus you should call the + {@link #close() close()} method as soon as possible, to force + CBZip2InputStream to release the allocated memory. See + {@link CBZip2OutputStream CBZip2OutputStream} for information about memory + usage. +

    + +

    + CBZip2InputStream reads bytes from the compressed source stream via + the single byte {@link java.io.InputStream#read() read()} method exclusively. + Thus you should consider to use a buffered source stream. +

    + +

    + Instances of this class are not threadsafe. +

    ]]> +
    +
    + + + + + + + + CBZip2OutputStream with a blocksize of 900k. + +

    + Attention: The caller is resonsible to write the two BZip2 magic + bytes "BZ" to the specified stream prior to calling this + constructor. +

    + + @param out * + the destination stream. + + @throws IOException + if an I/O error occurs in the specified stream. + @throws NullPointerException + if out == null.]]> +
    +
    + + + + CBZip2OutputStream with specified blocksize. + +

    + Attention: The caller is resonsible to write the two BZip2 magic + bytes "BZ" to the specified stream prior to calling this + constructor. +

    + + + @param out + the destination stream. + @param blockSize + the blockSize as 100k units. + + @throws IOException + if an I/O error occurs in the specified stream. + @throws IllegalArgumentException + if (blockSize < 1) || (blockSize > 9). + @throws NullPointerException + if out == null. + + @see #MIN_BLOCKSIZE + @see #MAX_BLOCKSIZE]]> +
    +
    + + + + + + + + + + + + + inputLength this method returns MAX_BLOCKSIZE + always. + + @param inputLength + The length of the data which will be compressed by + CBZip2OutputStream.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + == 1.]]> + + + + + == 9.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If you are ever unlucky/improbable enough to get a stack overflow whilst + sorting, increase the following constant and try again. In practice I + have never seen the stack go above 27 elems, so the following limit seems + very generous. +

    ]]> +
    +
    + + + The compression requires large amounts of memory. Thus you should call the + {@link #close() close()} method as soon as possible, to force + CBZip2OutputStream to release the allocated memory. +

    + +

    + You can shrink the amount of allocated memory and maybe raise the compression + speed by choosing a lower blocksize, which in turn may cause a lower + compression ratio. You can avoid unnecessary memory allocation by avoiding + using a blocksize which is bigger than the size of the input. +

    + +

    + You can compute the memory usage for compressing by the following formula: +

    + +
    + <code>400k + (9 * blocksize)</code>.
    + 
    + +

    + To get the memory required for decompression by {@link CBZip2InputStream + CBZip2InputStream} use +

    + +
    + <code>65k + (5 * blocksize)</code>.
    + 
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Memory usage by blocksize
    Blocksize Compression
    + memory usage
    Decompression
    + memory usage
    100k1300k565k
    200k2200k1065k
    300k3100k1565k
    400k4000k2065k
    500k4900k2565k
    600k5800k3065k
    700k6700k3565k
    800k7600k4065k
    900k8500k4565k
    + +

    + For decompression CBZip2InputStream allocates less memory if the + bzipped input is smaller than one block. +

    + +

    + Instances of this class are not threadsafe. +

    + +

    + TODO: Update to BZip2 1.0.1 +

    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + true if lzo compressors are loaded & initialized, + else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if lzo decompressors are loaded & initialized, + else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return the total (non-negative) number of uncompressed bytes input so far]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return the total (non-negative) number of uncompressed bytes input so far]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if native-zlib is loaded & initialized + and can be loaded for this job, else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Keep trying a limited number of times, waiting a fixed time between attempts, + and then fail by re-throwing the exception. +

    ]]> +
    +
    + + + + + + + Keep trying for a maximum time, waiting a fixed time between attempts, + and then fail by re-throwing the exception. +

    ]]> +
    +
    + + + + + + + Keep trying a limited number of times, waiting a growing amount of time between attempts, + and then fail by re-throwing the exception. + The time between attempts is sleepTime mutliplied by the number of tries so far. +

    ]]> +
    +
    + + + + + + + Keep trying a limited number of times, waiting a growing amount of time between attempts, + and then fail by re-throwing the exception. + The time between attempts is sleepTime mutliplied by a random + number in the range of [0, 2 to the number of retries) +

    ]]> +
    +
    + + + + + + Set a default policy with some explicit handlers for specific exceptions. +

    ]]> +
    +
    + + + + + + A retry policy for RemoteException + Set a default policy with some explicit handlers for specific exceptions. +

    ]]> +
    +
    + + + + Try once, and fail by re-throwing the exception. + This corresponds to having no retry mechanism in place. +

    ]]> +
    +
    + + + + Try once, and fail silently for void methods, or by + re-throwing the exception for non-void methods. +

    ]]> +
    +
    + + + + Keep trying forever. +

    ]]> +
    +
    + + + A collection of useful implementations of {@link RetryPolicy}. +

    ]]> +
    +
    + + + + + + + + + + Determines whether the framework should retry a + method for the given exception, and the number + of retries that have been made for that operation + so far. +

    + @param e The exception that caused the method to fail. + @param retries The number of times the method has been retried. + @return true if the method should be retried, + false if the method should not be retried + but shouldn't fail with an exception (only for void methods). + @throws Exception The re-thrown exception e indicating + that the method failed and should not be retried further.]]> +
    +
    + + + Specifies a policy for retrying method failures. + Implementations of this interface should be immutable. +

    ]]> +
    +
    + + + + + + + + + + + + Create a proxy for an interface of an implementation class + using the same retry policy for each method in the interface. +

    + @param iface the interface that the retry will implement + @param implementation the instance whose methods should be retried + @param retryPolicy the policy for retirying method call failures + @return the retry proxy]]> +
    +
    + + + + + + + Create a proxy for an interface of an implementation class + using the a set of retry policies specified by method name. + If no retry policy is defined for a method then a default of + {@link RetryPolicies#TRY_ONCE_THEN_FAIL} is used. +

    + @param iface the interface that the retry will implement + @param implementation the instance whose methods should be retried + @param methodNameToPolicyMap a map of method names to retry policies + @return the retry proxy]]> +
    +
    + + + A factory for creating retry proxies. +

    ]]> +
    +
    + +
    + + + + + + + + Prepare the deserializer for reading.

    ]]> +
    +
    + + + + + + Deserialize the next object from the underlying input stream. + If the object t is non-null then this deserializer + may set its internal state to the next object read from the input + stream. Otherwise, if the object t is null a new + deserialized object will be created. +

    + @return the deserialized object]]> +
    +
    + + + + Close the underlying input stream and clear up any resources.

    ]]> +
    +
    + + + Provides a facility for deserializing objects of type from an + {@link InputStream}. +

    + +

    + Deserializers are stateful, but must not buffer the input since + other producers may read from the input between calls to + {@link #deserialize(Object)}. +

    + @param ]]> +
    +
    + + + + + + + + + + + + + + + + + + A {@link RawComparator} that uses a {@link Deserializer} to deserialize + the objects to be compared so that the standard {@link Comparator} can + be used to compare them. +

    +

    + One may optimize compare-intensive operations by using a custom + implementation of {@link RawComparator} that operates directly + on byte representations. +

    + @param ]]> +
    +
    + + + + + + + + + + + + + + + + + + An experimental {@link Serialization} for Java {@link Serializable} classes. +

    + @see JavaSerializationComparator]]> +
    +
    + + + + + + + + + + + + + A {@link RawComparator} that uses a {@link JavaSerialization} + {@link Deserializer} to deserialize objects that are then compared via + their {@link Comparable} interfaces. +

    + @param + @see JavaSerialization]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + Encapsulates a {@link Serializer}/{@link Deserializer} pair. +

    + @param ]]> +
    +
    + + + + + + + Serializations are found by reading the io.serializations + property from conf, which is a comma-delimited list of + classnames. +

    ]]> +
    +
    + + + + + + + + + + + + A factory for {@link Serialization}s. +

    ]]> +
    +
    + + + + + + + + Prepare the serializer for writing.

    ]]> +
    +
    + + + + + Serialize t to the underlying output stream.

    ]]> +
    +
    + + + + Close the underlying output stream and clear up any resources.

    ]]> +
    +
    + + + Provides a facility for serializing objects of type to an + {@link OutputStream}. +

    + +

    + Serializers are stateful, but must not buffer the output since + other producers may write to the output between calls to + {@link #serialize(Object)}. +

    + @param ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + param, to the IPC server running at + address, returning the value. Throws exceptions if there are + network problems or if the remote code threw an exception.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Unwraps any IOException. + + @param lookupTypes the desired exception class. + @return IOException, which is either the lookupClass exception or this.]]> + + + + + This unwraps any Throwable that has a constructor taking + a String as a parameter. + Otherwise it returns this. + + @return Throwable]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + protocol is a Java interface. All parameters and return types must + be one of: + +
    • a primitive type, boolean, byte, + char, short, int, long, + float, double, or void; or
    • + +
    • a {@link String}; or
    • + +
    • a {@link Writable}; or
    • + +
    • an array of the above types
    + + All methods in the protocol should throw only IOException. No field data of + the protocol instance is transmitted.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + handlerCount determines + the number of handler threads that will be used to process calls.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + This class has a number of metrics variables that are publicly accessible; + these variables (objects) have methods to update their values; + for example: +

    {@link #rpcQueueTime}.inc(time)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For the statistics that are sampled and averaged, one must specify + a metrics context that does periodic update calls. Most do. + The default Null metrics context however does NOT. So if you aren't + using any other metrics context then you can turn on the viewing and averaging + of sampled metrics by specifying the following two lines + in the hadoop-meterics.properties file: +

    +        rpc.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
    +        rpc.period=10
    +  
    +

    + Note that the metrics are collected regardless of the context used. + The context with the update thread is used to average the data periodically]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobTracker, + as {@link JobTracker.State} + + @return the current state of the JobTracker.]]> + + + + + + + + + + + + ClusterStatus provides clients with information such as: +

      +
    1. + Size of the cluster. +
    2. +
    3. + Task capacity of the cluster. +
    4. +
    5. + The number of currently running map & reduce tasks. +
    6. +
    7. + State of the JobTracker. +
    8. +

    + +

    Clients can query for the latest ClusterStatus, via + {@link JobClient#getClusterStatus()}.

    + + @see JobClient]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Counters represent global counters, defined either by the + Map-Reduce framework or applications. Each Counter can be of + any {@link Enum} type.

    + +

    Counters are bunched into {@link Group}s, each comprising of + counters from a particular Enum class.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Group of counters, comprising of counters from a particular + counter {@link Enum} class. + +

    Grouphandles localization of the class name and the + counter names.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileInputFormat implementations can override this and return + false to ensure that individual input files are never split-up + so that {@link Mapper}s process entire files. + + @param fs the file system that the file is on + @param filename the file name to check + @return is this file splitable?]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileInputFormat is the base class for all file-based + InputFormats. This provides a generic implementation of + {@link #getSplits(JobConf, int)}. + Subclasses of FileInputFormat can also override the + {@link #isSplitable(FileSystem, Path)} method to ensure input-files are + not split-up and are processed as a whole by {@link Mapper}s.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the job output should be compressed, + false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tasks' Side-Effect Files + +

    Note: The following is valid only if the {@link OutputCommitter} + is {@link FileOutputCommitter}. If OutputCommitter is not + a FileOutputCommitter, the task's temporary output + directory is same as {@link #getOutputPath(JobConf)} i.e. + ${mapred.output.dir}$

    + +

    Some applications need to create/write-to side-files, which differ from + the actual job-outputs. + +

    In such cases there could be issues with 2 instances of the same TIP + (running simultaneously e.g. speculative tasks) trying to open/write-to the + same file (path) on HDFS. Hence the application-writer will have to pick + unique names per task-attempt (e.g. using the attemptid, say + attempt_200709221812_0001_m_000000_0), not just per TIP.

    + +

    To get around this the Map-Reduce framework helps the application-writer + out by maintaining a special + ${mapred.output.dir}/_temporary/_${taskid} + sub-directory for each task-attempt on HDFS where the output of the + task-attempt goes. On successful completion of the task-attempt the files + in the ${mapred.output.dir}/_temporary/_${taskid} (only) + are promoted to ${mapred.output.dir}. Of course, the + framework discards the sub-directory of unsuccessful task-attempts. This + is completely transparent to the application.

    + +

    The application-writer can take advantage of this by creating any + side-files required in ${mapred.work.output.dir} during execution + of his reduce-task i.e. via {@link #getWorkOutputPath(JobConf)}, and the + framework will move them out similarly - thus she doesn't have to pick + unique paths per task-attempt.

    + +

    Note: the value of ${mapred.work.output.dir} during + execution of a particular task-attempt is actually + ${mapred.output.dir}/_temporary/_{$taskid}, and this value is + set by the map-reduce framework. So, just create any side-files in the + path returned by {@link #getWorkOutputPath(JobConf)} from map/reduce + task to take advantage of this feature.

    + +

    The entire discussion holds true for maps of jobs with + reducer=NONE (i.e. 0 reduces) since output of the map, in that case, + goes directly to HDFS.

    + + @return the {@link Path} to the task's temporary output directory + for the map-reduce job.]]> +
    +
    + + + + + + + + + + + + + The generated name can be used to create custom files from within the + different tasks for the job, the names for different tasks will not collide + with each other.

    + +

    The given name is postfixed with the task type, 'm' for maps, 'r' for + reduces and the task partition number. For example, give a name 'test' + running on the first map o the job the generated name will be + 'test-m-00000'.

    + + @param conf the configuration for the job. + @param name the name to make unique. + @return a unique name accross all tasks of the job.]]> +
    +
    + + + + + The path can be used to create custom files from within the map and + reduce tasks. The path name will be unique for each task. The path parent + will be the job output directory.

    ls + +

    This method uses the {@link #getUniqueName} method to make the file name + unique for the task.

    + + @param conf the configuration for the job. + @param name the name for the file. + @return a unique path accross all tasks of the job.]]> +
    +
    + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Each {@link InputSplit} is then assigned to an individual {@link Mapper} + for processing.

    + +

    Note: The split is a logical split of the inputs and the + input files are not physically split into chunks. For e.g. a split could + be <input-file-path, start, offset> tuple. + + @param job job configuration. + @param numSplits the desired number of splits, a hint. + @return an array of {@link InputSplit}s for the job.]]> + + + + + + + + + It is the responsibility of the RecordReader to respect + record boundaries while processing the logical split to present a + record-oriented view to the individual task.

    + + @param split the {@link InputSplit} + @param job the job that this split belongs to + @return a {@link RecordReader}]]> +
    +
    + + InputFormat describes the input-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the InputFormat of the + job to:

    +

      +
    1. + Validate the input-specification of the job. +
    2. + Split-up the input file(s) into logical {@link InputSplit}s, each of + which is then assigned to an individual {@link Mapper}. +
    3. +
    4. + Provide the {@link RecordReader} implementation to be used to glean + input records from the logical InputSplit for processing by + the {@link Mapper}. +
    5. +
    + +

    The default behavior of file-based {@link InputFormat}s, typically + sub-classes of {@link FileInputFormat}, is to split the + input into logical {@link InputSplit}s based on the total size, in + bytes, of the input files. However, the {@link FileSystem} blocksize of + the input files is treated as an upper bound for input splits. A lower bound + on the split size can be set via + + mapred.min.split.size.

    + +

    Clearly, logical splits based on input-size is insufficient for many + applications since record boundaries are to respected. In such cases, the + application has to also implement a {@link RecordReader} on whom lies the + responsibilty to respect record-boundaries and present a record-oriented + view of the logical InputSplit to the individual task. + + @see InputSplit + @see RecordReader + @see JobClient + @see FileInputFormat]]> + + + + + + + + + + InputSplit. + + @return the number of bytes in the input split. + @throws IOException]]> + + + + + + InputSplit is + located as an array of Strings. + @throws IOException]]> + + + + InputSplit represents the data to be processed by an + individual {@link Mapper}. + +

    Typically, it presents a byte-oriented view on the input and is the + responsibility of {@link RecordReader} of the job to process this and present + a record-oriented view. + + @see InputFormat + @see RecordReader]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobClient.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + jobid doesn't correspond to any known job. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobClient is the primary interface for the user-job to interact + with the {@link JobTracker}. + + JobClient provides facilities to submit jobs, track their + progress, access component-tasks' reports/logs, get the Map-Reduce cluster + status information etc. + +

    The job submission process involves: +

      +
    1. + Checking the input and output specifications of the job. +
    2. +
    3. + Computing the {@link InputSplit}s for the job. +
    4. +
    5. + Setup the requisite accounting information for the {@link DistributedCache} + of the job, if necessary. +
    6. +
    7. + Copying the job's jar and configuration to the map-reduce system directory + on the distributed file-system. +
    8. +
    9. + Submitting the job to the JobTracker and optionally monitoring + it's status. +
    10. +

    + + Normally the user creates the application, describes various facets of the + job via {@link JobConf} and then uses the JobClient to submit + the job and monitor its progress. + +

    Here is an example on how to use JobClient:

    +

    +     // Create a new JobConf
    +     JobConf job = new JobConf(new Configuration(), MyJob.class);
    +     
    +     // Specify various job-specific parameters     
    +     job.setJobName("myjob");
    +     
    +     job.setInputPath(new Path("in"));
    +     job.setOutputPath(new Path("out"));
    +     
    +     job.setMapperClass(MyJob.MyMapper.class);
    +     job.setReducerClass(MyJob.MyReducer.class);
    +
    +     // Submit the job, then poll for progress until the job is complete
    +     JobClient.runJob(job);
    + 

    + +

    Job Control

    + +

    At times clients would chain map-reduce jobs to accomplish complex tasks + which cannot be done via a single map-reduce job. This is fairly easy since + the output of the job, typically, goes to distributed file-system and that + can be used as the input for the next job.

    + +

    However, this also means that the onus on ensuring jobs are complete + (success/failure) lies squarely on the clients. In such situations the + various job-control options are: +

      +
    1. + {@link #runJob(JobConf)} : submits the job and returns only after + the job has completed. +
    2. +
    3. + {@link #submitJob(JobConf)} : only submits the job, then poll the + returned handle to the {@link RunningJob} to query status and make + scheduling decisions. +
    4. +
    5. + {@link JobConf#setJobEndNotificationURI(String)} : setup a notification + on job-completion, thus avoiding polling. +
    6. +

    + + @see JobConf + @see ClusterStatus + @see Tool + @see DistributedCache]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If the parameter {@code loadDefaults} is false, the new instance + will not load resources from the default files. + + @param loadDefaults specifies whether to load from the default files]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if framework should keep the intermediate files + for failed tasks, false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the outputs of the maps are to be compressed, + false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This comparator should be provided if the equivalence rules for keys + for sorting the intermediates are different from those for grouping keys + before each call to + {@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.

    + +

    For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed + in a single call to the reduce function if K1 and K2 compare as equal.

    + +

    Since {@link #setOutputKeyComparatorClass(Class)} can be used to control + how keys are sorted, this can be used in conjunction to simulate + secondary sort on values.

    + +

    Note: This is not a guarantee of the reduce sort being + stable in any sense. (In any case, with the order of available + map-outputs to the reduce being non-deterministic, it wouldn't make + that much sense.)

    + + @param theClass the comparator class to be used for grouping keys. + It should implement RawComparator. + @see #setOutputKeyComparatorClass(Class)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + combiner class used to combine map-outputs + before being sent to the reducers. Typically the combiner is same as the + the {@link Reducer} for the job i.e. {@link #getReducerClass()}. + + @return the user-defined combiner class used to combine map-outputs.]]> + + + + + + combiner class used to combine map-outputs + before being sent to the reducers. + +

    The combiner is a task-level aggregation operation which, in some cases, + helps to cut down the amount of data transferred from the {@link Mapper} to + the {@link Reducer}, leading to better performance.

    + +

    Typically the combiner is same as the the Reducer for the + job i.e. {@link #setReducerClass(Class)}.

    + + @param theClass the user-defined combiner class used to combine + map-outputs.]]> +
    +
    + + + true. + + @return true if speculative execution be used for this job, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on, else false.]]> + + + + + true. + + @return true if speculative execution be + used for this job for map tasks, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on for map tasks, + else false.]]> + + + + + true. + + @return true if speculative execution be used + for reduce tasks for this job, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on for reduce tasks, + else false.]]> + + + + + 1. + + @return the number of reduce tasks for this job.]]> + + + + + + Note: This is only a hint to the framework. The actual + number of spawned map tasks depends on the number of {@link InputSplit}s + generated by the job's {@link InputFormat#getSplits(JobConf, int)}. + + A custom {@link InputFormat} is typically used to accurately control + the number of map tasks for the job.

    + +

    How many maps?

    + +

    The number of maps is usually driven by the total size of the inputs + i.e. total number of blocks of the input files.

    + +

    The right level of parallelism for maps seems to be around 10-100 maps + per-node, although it has been set up to 300 or so for very cpu-light map + tasks. Task setup takes awhile, so it is best if the maps take at least a + minute to execute.

    + +

    The default behavior of file-based {@link InputFormat}s is to split the + input into logical {@link InputSplit}s based on the total size, in + bytes, of input files. However, the {@link FileSystem} blocksize of the + input files is treated as an upper bound for input splits. A lower bound + on the split size can be set via + + mapred.min.split.size.

    + +

    Thus, if you expect 10TB of input data and have a blocksize of 128MB, + you'll end up with 82,000 maps, unless {@link #setNumMapTasks(int)} is + used to set it even higher.

    + + @param n the number of map tasks for this job. + @see InputFormat#getSplits(JobConf, int) + @see FileInputFormat + @see FileSystem#getDefaultBlockSize() + @see FileStatus#getBlockSize()]]> +
    +
    + + + 1. + + @return the number of reduce tasks for this job.]]> + + + + + + How many reduces? + +

    The right number of reduces seems to be 0.95 or + 1.75 multiplied by (<no. of nodes> * + + mapred.tasktracker.reduce.tasks.maximum). +

    + +

    With 0.95 all of the reduces can launch immediately and + start transfering map outputs as the maps finish. With 1.75 + the faster nodes will finish their first round of reduces and launch a + second wave of reduces doing a much better job of load balancing.

    + +

    Increasing the number of reduces increases the framework overhead, but + increases load balancing and lowers the cost of failures.

    + +

    The scaling factors above are slightly less than whole numbers to + reserve a few reduce slots in the framework for speculative-tasks, failures + etc.

    + +

    Reducer NONE

    + +

    It is legal to set the number of reduce-tasks to zero.

    + +

    In this case the output of the map-tasks directly go to distributed + file-system, to the path set by + {@link FileOutputFormat#setOutputPath(JobConf, Path)}. Also, the + framework doesn't sort the map-outputs before writing it out to HDFS.

    + + @param n the number of reduce tasks for this job.]]> +
    +
    + + + mapred.map.max.attempts + property. If this property is not already set, the default is 4 attempts. + + @return the max number of attempts per map task.]]> + + + + + + + + + + + mapred.reduce.max.attempts + property. If this property is not already set, the default is 4 attempts. + + @return the max number of attempts per reduce task.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + noFailures, the + tasktracker is blacklisted for this job. + + @param noFailures maximum no. of failures of a given job per tasktracker.]]> + + + + + blacklisted for this job. + + @return the maximum no. of failures of a given job per tasktracker.]]> + + + + + failed. + + Defaults to zero, i.e. any failed map-task results in + the job being declared as {@link JobStatus#FAILED}. + + @return the maximum percentage of map tasks that can fail without + the job being aborted.]]> + + + + + + failed. + + @param percent the maximum percentage of map tasks that can fail without + the job being aborted.]]> + + + + + failed. + + Defaults to zero, i.e. any failed reduce-task results + in the job being declared as {@link JobStatus#FAILED}. + + @return the maximum percentage of reduce tasks that can fail without + the job being aborted.]]> + + + + + + failed. + + @param percent the maximum percentage of reduce tasks that can fail without + the job being aborted.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The debug script can aid debugging of failed map tasks. The script is + given task's stdout, stderr, syslog, jobconf files as arguments.

    + +

    The debug command, run on the node where the map failed, is:

    +

    + $script $stdout $stderr $syslog $jobconf. +

    + +

    The script file is distributed through {@link DistributedCache} + APIs. The script needs to be symlinked.

    + +

    Here is an example on how to submit a script +

    + job.setMapDebugScript("./myscript");
    + DistributedCache.createSymlink(job);
    + DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
    + 

    + + @param mDbgScript the script name]]> +
    +
    + + + + + + + + + The debug script can aid debugging of failed reduce tasks. The script + is given task's stdout, stderr, syslog, jobconf files as arguments.

    + +

    The debug command, run on the node where the map failed, is:

    +

    + $script $stdout $stderr $syslog $jobconf. +

    + +

    The script file is distributed through {@link DistributedCache} + APIs. The script file needs to be symlinked

    + +

    Here is an example on how to submit a script +

    + job.setReduceDebugScript("./myscript");
    + DistributedCache.createSymlink(job);
    + DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
    + 

    + + @param rDbgScript the script name]]> +
    +
    + + + + + + + + null if it hasn't + been set. + @see #setJobEndNotificationURI(String)]]> + + + + + + The uri can contain 2 special parameters: $jobId and + $jobStatus. Those, if present, are replaced by the job's + identifier and completion-status respectively.

    + +

    This is typically used by application-writers to implement chaining of + Map-Reduce jobs in an asynchronous manner.

    + + @param uri the job end notification uri + @see JobStatus + @see Job Completion and Chaining]]> +
    +
    + + + + When a job starts, a shared directory is created at location + + ${mapred.local.dir}/taskTracker/jobcache/$jobid/work/ . + This directory is exposed to the users through + job.local.dir . + So, the tasks can use this space + as scratch space and share files among them.

    + This value is available as System property also. + + @return The localized job specific shared directory]]> +
    +
    + + + + + + + + + + + + + + + + + + JobConf is the primary interface for a user to describe a + map-reduce job to the Hadoop framework for execution. The framework tries to + faithfully execute the job as-is described by JobConf, however: +
      +
    1. + Some configuration parameters might have been marked as + + final by administrators and hence cannot be altered. +
    2. +
    3. + While some job parameters are straight-forward to set + (e.g. {@link #setNumReduceTasks(int)}), some parameters interact subtly + rest of the framework and/or job-configuration and is relatively more + complex for the user to control finely (e.g. {@link #setNumMapTasks(int)}). +
    4. +

    + +

    JobConf typically specifies the {@link Mapper}, combiner + (if any), {@link Partitioner}, {@link Reducer}, {@link InputFormat} and + {@link OutputFormat} implementations to be used etc. + +

    Optionally JobConf is used to specify other advanced facets + of the job such as Comparators to be used, files to be put in + the {@link DistributedCache}, whether or not intermediate and/or job outputs + are to be compressed (and how), debugability via user-provided scripts + ( {@link #setMapDebugScript(String)}/{@link #setReduceDebugScript(String)}), + for doing post-processing on task logs, task's stdout, stderr, syslog. + and etc.

    + +

    Here is an example on how to configure a job via JobConf:

    +

    +     // Create a new JobConf
    +     JobConf job = new JobConf(new Configuration(), MyJob.class);
    +     
    +     // Specify various job-specific parameters     
    +     job.setJobName("myjob");
    +     
    +     FileInputFormat.setInputPaths(job, new Path("in"));
    +     FileOutputFormat.setOutputPath(job, new Path("out"));
    +     
    +     job.setMapperClass(MyJob.MyMapper.class);
    +     job.setCombinerClass(MyJob.MyReducer.class);
    +     job.setReducerClass(MyJob.MyReducer.class);
    +     
    +     job.setInputFormat(SequenceFileInputFormat.class);
    +     job.setOutputFormat(SequenceFileOutputFormat.class);
    + 

    + + @see JobClient + @see ClusterStatus + @see Tool + @see DistributedCache]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + .]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + any job + run on the jobtracker started at 200707121733, we would use : +
     
    + JobID.getTaskIDsPattern("200707121733", null);
    + 
    + which will return : +
     "job_200707121733_[0-9]*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @return a regex pattern matching JobIDs]]> +
    +
    + + + An example JobID is : + job_200707121733_0003 , which represents the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse JobID strings, but rather + use appropriate constructors or {@link #forName(String)} method. + + @see TaskID + @see TaskAttemptID + @see JobTracker#getNewJobId() + @see JobTracker#getStartTime()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + "N/A" + + @return Scheduling information associated to particular Job Queue]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -archives + -files inputjar args]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + zero. + + @param conf configuration for the JobTracker. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Output pairs need not be of the same types as input pairs. A given + input pair may map to zero or many output pairs. Output pairs are + collected with calls to + {@link OutputCollector#collect(Object,Object)}.

    + +

    Applications can use the {@link Reporter} provided to report progress + or just indicate that they are alive. In scenarios where the application + takes an insignificant amount of time to process individual key/value + pairs, this is crucial since the framework might assume that the task has + timed-out and kill that task. The other way of avoiding this is to set + + mapred.task.timeout to a high-enough value (or even zero for no + time-outs).

    + + @param key the input key. + @param value the input value. + @param output collects mapped keys and values. + @param reporter facility to report progress.]]> +
    + + + Maps are the individual tasks which transform input records into a + intermediate records. The transformed intermediate records need not be of + the same type as the input records. A given input pair may map to zero or + many output pairs.

    + +

    The Hadoop Map-Reduce framework spawns one map task for each + {@link InputSplit} generated by the {@link InputFormat} for the job. + Mapper implementations can access the {@link JobConf} for the + job via the {@link JobConfigurable#configure(JobConf)} and initialize + themselves. Similarly they can use the {@link Closeable#close()} method for + de-initialization.

    + +

    The framework then calls + {@link #map(Object, Object, OutputCollector, Reporter)} + for each key/value pair in the InputSplit for that task.

    + +

    All intermediate values associated with a given output key are + subsequently grouped by the framework, and passed to a {@link Reducer} to + determine the final output. Users can control the grouping by specifying + a Comparator via + {@link JobConf#setOutputKeyComparatorClass(Class)}.

    + +

    The grouped Mapper outputs are partitioned per + Reducer. Users can control which keys (and hence records) go to + which Reducer by implementing a custom {@link Partitioner}. + +

    Users can optionally specify a combiner, via + {@link JobConf#setCombinerClass(Class)}, to perform local aggregation of the + intermediate outputs, which helps to cut down the amount of data transferred + from the Mapper to the Reducer. + +

    The intermediate, grouped outputs are always stored in + {@link SequenceFile}s. Applications can specify if and how the intermediate + outputs are to be compressed and which {@link CompressionCodec}s are to be + used via the JobConf.

    + +

    If the job has + zero + reduces then the output of the Mapper is directly written + to the {@link FileSystem} without grouping by keys.

    + +

    Example:

    +

    +     public class MyMapper<K extends WritableComparable, V extends Writable> 
    +     extends MapReduceBase implements Mapper<K, V, K, V> {
    +     
    +       static enum MyCounters { NUM_RECORDS }
    +       
    +       private String mapTaskId;
    +       private String inputFile;
    +       private int noRecords = 0;
    +       
    +       public void configure(JobConf job) {
    +         mapTaskId = job.get("mapred.task.id");
    +         inputFile = job.get("mapred.input.file");
    +       }
    +       
    +       public void map(K key, V val,
    +                       OutputCollector<K, V> output, Reporter reporter)
    +       throws IOException {
    +         // Process the <key, value> pair (assume this takes a while)
    +         // ...
    +         // ...
    +         
    +         // Let the framework know that we are alive, and kicking!
    +         // reporter.progress();
    +         
    +         // Process some more
    +         // ...
    +         // ...
    +         
    +         // Increment the no. of <key, value> pairs processed
    +         ++noRecords;
    +
    +         // Increment counters
    +         reporter.incrCounter(NUM_RECORDS, 1);
    +        
    +         // Every 100 records update application-level status
    +         if ((noRecords%100) == 0) {
    +           reporter.setStatus(mapTaskId + " processed " + noRecords + 
    +                              " from input-file: " + inputFile); 
    +         }
    +         
    +         // Output the result
    +         output.collect(key, val);
    +       }
    +     }
    + 

    + +

    Applications may write a custom {@link MapRunnable} to exert greater + control on map processing e.g. multi-threaded Mappers etc.

    + + @see JobConf + @see InputFormat + @see Partitioner + @see Reducer + @see MapReduceBase + @see MapRunnable + @see SequenceFile]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + Provides default no-op implementations for a few methods, most non-trivial + applications need to override some of them.

    ]]> +
    +
    + + + + + + + + + + + <key, value> pairs. + +

    Mapping of input records to output records is complete when this method + returns.

    + + @param input the {@link RecordReader} to read the input records. + @param output the {@link OutputCollector} to collect the outputrecords. + @param reporter {@link Reporter} to report progress, status-updates etc. + @throws IOException]]> +
    +
    + + Custom implementations of MapRunnable can exert greater + control on map processing e.g. multi-threaded, asynchronous mappers etc.

    + + @see Mapper]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + nearly + equal content length.
    + Subclasses implement {@link #getRecordReader(InputSplit, JobConf, Reporter)} + to construct RecordReader's for MultiFileSplit's. + @see MultiFileSplit]]> +
    +
    + + + + + + + + + + + + + + + + + th Path]]> + + + + + + + + + + + th Path]]> + + + + + + + + + + + + + + + + + + + + + + + MultiFileSplit can be used to implement {@link RecordReader}'s, with + reading one record per file. + @see FileSplit + @see MultiFileInputFormat]]> + + + + + + + + + + + + + + + <key, value> pairs output by {@link Mapper}s + and {@link Reducer}s. + +

    OutputCollector is the generalization of the facility + provided by the Map-Reduce framework to collect data output by either the + Mapper or the Reducer i.e. intermediate outputs + or the output of the job.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + OutputCommitter describes the commit of task output for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the OutputCommitter of + the job to:

    +

      +
    1. + Setup the job during initialization. For example, create the temporary + output directory for the job during the initialization of the job. +
    2. +
    3. + Cleanup the job after the job completion. For example, remove the + temporary output directory after the job completion. +
    4. +
    5. + Setup the task temporary output. +
    6. +
    7. + Check whether a task needs a commit. This is to avoid the commit + procedure if a task does not need commit. +
    8. +
    9. + Commit of the task output. +
    10. +
    11. + Discard the task commit. +
    12. +
    + + @see FileOutputCommitter + @see JobContext + @see TaskAttemptContext]]> +
    +
    + + + + + + + + + + + + + + + + + + + This is to validate the output specification for the job when it is + a job is submitted. Typically checks that it does not already exist, + throwing an exception when it already exists, so that output is not + overwritten.

    + + @param ignored + @param job job configuration. + @throws IOException when output should not be attempted]]> +
    +
    + + OutputFormat describes the output-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the OutputFormat of the + job to:

    +

      +
    1. + Validate the output-specification of the job. For e.g. check that the + output directory doesn't already exist. +
    2. + Provide the {@link RecordWriter} implementation to be used to write out + the output files of the job. Output files are stored in a + {@link FileSystem}. +
    3. +
    + + @see RecordWriter + @see JobConf]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + Typically a hash function on a all or a subset of the key.

    + + @param key the key to be paritioned. + @param value the entry value. + @param numPartitions the total number of partitions. + @return the partition number for the key.]]> +
    +
    + + Partitioner controls the partitioning of the keys of the + intermediate map-outputs. The key (or a subset of the key) is used to derive + the partition, typically by a hash function. The total number of partitions + is the same as the number of reduce tasks for the job. Hence this controls + which of the m reduce tasks the intermediate key (and hence the + record) is sent for reduction.

    + + @see Reducer]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.0 to 1.0. + @throws IOException]]> + + + + RecordReader reads <key, value> pairs from an + {@link InputSplit}. + +

    RecordReader, typically, converts the byte-oriented view of + the input, provided by the InputSplit, and presents a + record-oriented view for the {@link Mapper} & {@link Reducer} tasks for + processing. It thus assumes the responsibility of processing record + boundaries and presenting the tasks with keys and values.

    + + @see InputSplit + @see InputFormat]]> +
    +
    + + + + + + + + + + + + + + + + RecordWriter to future operations. + + @param reporter facility to report progress. + @throws IOException]]> + + + + RecordWriter writes the output <key, value> pairs + to an output file. + +

    RecordWriter implementations write the job outputs to the + {@link FileSystem}. + + @see OutputFormat]]> + + + + + + + + + + + + + + + Reduces values for a given key. + +

    The framework calls this method for each + <key, (list of values)> pair in the grouped inputs. + Output values must be of the same type as input values. Input keys must + not be altered. The framework will reuse the key and value objects + that are passed into the reduce, therefore the application should clone + the objects they want to keep a copy of. In many cases, all values are + combined into zero or one value. +

    + +

    Output pairs are collected with calls to + {@link OutputCollector#collect(Object,Object)}.

    + +

    Applications can use the {@link Reporter} provided to report progress + or just indicate that they are alive. In scenarios where the application + takes an insignificant amount of time to process individual key/value + pairs, this is crucial since the framework might assume that the task has + timed-out and kill that task. The other way of avoiding this is to set + + mapred.task.timeout to a high-enough value (or even zero for no + time-outs).

    + + @param key the key. + @param values the list of values to reduce. + @param output to collect keys and combined values. + @param reporter facility to report progress.]]> +
    + + + The number of Reducers for the job is set by the user via + {@link JobConf#setNumReduceTasks(int)}. Reducer implementations + can access the {@link JobConf} for the job via the + {@link JobConfigurable#configure(JobConf)} method and initialize themselves. + Similarly they can use the {@link Closeable#close()} method for + de-initialization.

    + +

    Reducer has 3 primary phases:

    +
      +
    1. + +

      Shuffle

      + +

      Reducer is input the grouped output of a {@link Mapper}. + In the phase the framework, for each Reducer, fetches the + relevant partition of the output of all the Mappers, via HTTP. +

      +
    2. + +
    3. +

      Sort

      + +

      The framework groups Reducer inputs by keys + (since different Mappers may have output the same key) in this + stage.

      + +

      The shuffle and sort phases occur simultaneously i.e. while outputs are + being fetched they are merged.

      + +
      SecondarySort
      + +

      If equivalence rules for keys while grouping the intermediates are + different from those for grouping keys before reduction, then one may + specify a Comparator via + {@link JobConf#setOutputValueGroupingComparator(Class)}.Since + {@link JobConf#setOutputKeyComparatorClass(Class)} can be used to + control how intermediate keys are grouped, these can be used in conjunction + to simulate secondary sort on values.

      + + + For example, say that you want to find duplicate web pages and tag them + all with the url of the "best" known example. You would set up the job + like: +
        +
      • Map Input Key: url
      • +
      • Map Input Value: document
      • +
      • Map Output Key: document checksum, url pagerank
      • +
      • Map Output Value: url
      • +
      • Partitioner: by checksum
      • +
      • OutputKeyComparator: by checksum and then decreasing pagerank
      • +
      • OutputValueGroupingComparator: by checksum
      • +
      +
    4. + +
    5. +

      Reduce

      + +

      In this phase the + {@link #reduce(Object, Iterator, OutputCollector, Reporter)} + method is called for each <key, (list of values)> pair in + the grouped inputs.

      +

      The output of the reduce task is typically written to the + {@link FileSystem} via + {@link OutputCollector#collect(Object, Object)}.

      +
    6. +
    + +

    The output of the Reducer is not re-sorted.

    + +

    Example:

    +

    +     public class MyReducer<K extends WritableComparable, V extends Writable> 
    +     extends MapReduceBase implements Reducer<K, V, K, V> {
    +     
    +       static enum MyCounters { NUM_RECORDS }
    +        
    +       private String reduceTaskId;
    +       private int noKeys = 0;
    +       
    +       public void configure(JobConf job) {
    +         reduceTaskId = job.get("mapred.task.id");
    +       }
    +       
    +       public void reduce(K key, Iterator<V> values,
    +                          OutputCollector<K, V> output, 
    +                          Reporter reporter)
    +       throws IOException {
    +       
    +         // Process
    +         int noValues = 0;
    +         while (values.hasNext()) {
    +           V value = values.next();
    +           
    +           // Increment the no. of values for this key
    +           ++noValues;
    +           
    +           // Process the <key, value> pair (assume this takes a while)
    +           // ...
    +           // ...
    +           
    +           // Let the framework know that we are alive, and kicking!
    +           if ((noValues%10) == 0) {
    +             reporter.progress();
    +           }
    +         
    +           // Process some more
    +           // ...
    +           // ...
    +           
    +           // Output the <key, value> 
    +           output.collect(key, value);
    +         }
    +         
    +         // Increment the no. of <key, list of values> pairs processed
    +         ++noKeys;
    +         
    +         // Increment counters
    +         reporter.incrCounter(NUM_RECORDS, 1);
    +         
    +         // Every 100 keys update application-level status
    +         if ((noKeys%100) == 0) {
    +           reporter.setStatus(reduceTaskId + " processed " + noKeys);
    +         }
    +       }
    +     }
    + 

    + + @see Mapper + @see Partitioner + @see Reporter + @see MapReduceBase]]> +
    +
    + + + + + + + + + + + + + + + Counter of the given group/name.]]> + + + + + + + Enum. + @param amount A non-negative amount by which the counter is to + be incremented.]]> + + + + + + + + + + + + + + InputSplit that the map is reading from. + @throws UnsupportedOperationException if called outside a mapper]]> + + + + + + + + + {@link Mapper} and {@link Reducer} can use the Reporter + provided to report progress or just indicate that they are alive. In + scenarios where the application takes an insignificant amount of time to + process individual key/value pairs, this is crucial since the framework + might assume that the task has timed-out and kill that task. + +

    Applications can also update {@link Counters} via the provided + Reporter .

    + + @see Progressable + @see Counters]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + progress of the job's map-tasks, as a float between 0.0 + and 1.0. When all map tasks have completed, the function returns 1.0. + + @return the progress of the job's map-tasks. + @throws IOException]]> + + + + + + progress of the job's reduce-tasks, as a float between 0.0 + and 1.0. When all reduce tasks have completed, the function returns 1.0. + + @return the progress of the job's reduce-tasks. + @throws IOException]]> + + + + + + progress of the job's cleanup-tasks, as a float between 0.0 + and 1.0. When all cleanup tasks have completed, the function returns 1.0. + + @return the progress of the job's cleanup-tasks. + @throws IOException]]> + + + + + + progress of the job's setup-tasks, as a float between 0.0 + and 1.0. When all setup tasks have completed, the function returns 1.0. + + @return the progress of the job's setup-tasks. + @throws IOException]]> + + + + + + true if the job is complete, else false. + @throws IOException]]> + + + + + + true if the job succeeded, else false. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RunningJob is the user-interface to query for details on a + running Map-Reduce job. + +

    Clients can get hold of RunningJob via the {@link JobClient} + and then query the running-job for details such as name, configuration, + progress etc.

    + + @see JobClient]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This allows the user to specify the key class to be different + from the actual class ({@link BytesWritable}) used for writing

    + + @param conf the {@link JobConf} to modify + @param theClass the SequenceFile output key class.]]> +
    +
    + + + + + This allows the user to specify the value class to be different + from the actual class ({@link BytesWritable}) used for writing

    + + @param conf the {@link JobConf} to modify + @param theClass the SequenceFile output key class.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + f. The filtering criteria is + MD5(key) % f == 0.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + f using + the criteria record# % f == 0. + For example, if the frequency is 10, one out of 10 records is returned.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if auto increment + {@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS}. + false otherwise.]]> + + + + + + + + + + + + + true if auto increment + {@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS}. + false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Hadoop provides an optional mode of execution in which the bad records + are detected and skipped in further attempts. + +

    This feature can be used when map/reduce tasks crashes deterministically on + certain input. This happens due to bugs in the map/reduce function. The usual + course would be to fix these bugs. But sometimes this is not possible; + perhaps the bug is in third party libraries for which the source code is + not available. Due to this, the task never reaches to completion even with + multiple attempts and complete data for that task is lost.

    + +

    With this feature, only a small portion of data is lost surrounding + the bad record, which may be acceptable for some user applications. + see {@link SkipBadRecords#setMapperMaxSkipRecords(Configuration, long)}

    + +

    The skipping mode gets kicked off after certain no of failures + see {@link SkipBadRecords#setAttemptsToStartSkipping(Configuration, int)}

    + +

    In the skipping mode, the map/reduce task maintains the record range which + is getting processed at all times. Before giving the input to the + map/reduce function, it sends this record range to the Task tracker. + If task crashes, the Task tracker knows which one was the last reported + range. On further attempts that range get skipped.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + all task attempt IDs + of any jobtracker, in any job, of the first + map task, we would use : +
     
    + TaskAttemptID.getTaskAttemptIDsPattern(null, null, true, 1, null);
    + 
    + which will return : +
     "attempt_[^_]*_[0-9]*_m_000001_[0-9]*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @param isMap whether the tip is a map, or null + @param taskId taskId number, or null + @param attemptId the task attempt number, or null + @return a regex pattern matching TaskAttemptIDs]]> +
    +
    + + + An example TaskAttemptID is : + attempt_200707121733_0003_m_000005_0 , which represents the + zeroth task attempt for the fifth map task in the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse TaskAttemptID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the first map task + of any jobtracker, of any job, we would use : +

     
    + TaskID.getTaskIDsPattern(null, null, true, 1);
    + 
    + which will return : +
     "task_[^_]*_[0-9]*_m_000001*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @param isMap whether the tip is a map, or null + @param taskId taskId number, or null + @return a regex pattern matching TaskIDs]]> +
    + + + + An example TaskID is : + task_200707121733_0003_m_000005 , which represents the + fifth map task in the third job running at the jobtracker + started at 200707121733. +

    + Applications should never construct or parse TaskID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskAttemptID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hadoop.log.dir.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the Job was added.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ([,]*) + func ::= tbl(,"") + class ::= @see java.lang.Class#forName(java.lang.String) + path ::= @see org.apache.hadoop.fs.Path#Path(java.lang.String) + } + Reads expression from the mapred.join.expr property and + user-supplied join types from mapred.join.define.<ident> + types. Paths supplied to tbl are given as input paths to the + InputFormat class listed. + @see #compose(java.lang.String, java.lang.Class, java.lang.String...)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ,

    ) }]]> + + + + + + + + (tbl(,),tbl(,),...,tbl(,)) }]]> + + + + + + + + (tbl(,),tbl(,),...,tbl(,)) }]]> + + + + mapred.join.define.<ident> to a classname. In the expression + mapred.join.expr, the identifier will be assumed to be a + ComposableRecordReader. + mapred.join.keycomparator can be a classname used to compare keys + in the join. + @see JoinRecordReader + @see MultiFilterRecordReader]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ...... + }]]> + + + + + + + + + + + + + + + + + + + + + capacity children to position + id in the parent reader. + The id of a root CompositeRecordReader is -1 by convention, but relying + on this is not recommended.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + override(S1,S2,S3) will prefer values + from S3 over S2, and values from S2 over S1 for all keys + emitted from all sources.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [,,...,]]]> + + + + + + + out. + TupleWritable format: + {@code + ...... + }]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It has to be specified how key and values are passed from one element of + the chain to the next, by value or by reference. If a Mapper leverages the + assumed semantics that the key and values are not modified by the collector + 'by value' must be used. If the Mapper does not expect this semantics, as + an optimization to avoid serialization and deserialization 'by reference' + can be used. +

    + For the added Mapper the configuration given for it, + mapperConf, have precedence over the job's JobConf. This + precedence is in effect when the task is running. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainMapper, this is done by the addMapper for the last mapper in the chain +

    + + @param job job's JobConf to add the Mapper class. + @param klass the Mapper class to add. + @param inputKeyClass mapper input key class. + @param inputValueClass mapper input value class. + @param outputKeyClass mapper output key class. + @param outputValueClass mapper output value class. + @param byValue indicates if key/values should be passed by value + to the next Mapper in the chain, if any. + @param mapperConf a JobConf with the configuration for the Mapper + class. It is recommended to use a JobConf without default values using the + JobConf(boolean loadDefaults) constructor with FALSE.]]> + + + + + + + If this method is overriden super.configure(...) should be + invoked at the beginning of the overwriter method.]]> + + + + + + + + + + map(...) methods of the Mappers in the chain.]]> + + + + + + + If this method is overriden super.close() should be + invoked at the end of the overwriter method.]]> + + + + + The Mapper classes are invoked in a chained (or piped) fashion, the output of + the first becomes the input of the second, and so on until the last Mapper, + the output of the last Mapper will be written to the task's output. +

    + The key functionality of this feature is that the Mappers in the chain do not + need to be aware that they are executed in a chain. This enables having + reusable specialized Mappers that can be combined to perform composite + operations within a single task. +

    + Special care has to be taken when creating chains that the key/values output + by a Mapper are valid for the following Mapper in the chain. It is assumed + all Mappers and the Reduce in the chain use maching output and input key and + value classes as no conversion is done by the chaining code. +

    + Using the ChainMapper and the ChainReducer classes is possible to compose + Map/Reduce jobs that look like [MAP+ / REDUCE MAP*]. And + immediate benefit of this pattern is a dramatic reduction in disk IO. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainMapper, this is done by the addMapper for the last mapper in the chain. +

    + ChainMapper usage pattern: +

    +

    + ...
    + conf.setJobName("chain");
    + conf.setInputFormat(TextInputFormat.class);
    + conf.setOutputFormat(TextOutputFormat.class);
    + 

    + JobConf mapAConf = new JobConf(false); + ... + ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class, + Text.class, Text.class, true, mapAConf); +

    + JobConf mapBConf = new JobConf(false); + ... + ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class, + LongWritable.class, Text.class, false, mapBConf); +

    + JobConf reduceConf = new JobConf(false); + ... + ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class, + Text.class, Text.class, true, reduceConf); +

    + ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class, + LongWritable.class, Text.class, false, null); +

    + ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class, + LongWritable.class, LongWritable.class, true, null); +

    + FileInputFormat.setInputPaths(conf, inDir); + FileOutputFormat.setOutputPath(conf, outDir); + ... +

    + JobClient jc = new JobClient(conf); + RunningJob job = jc.submitJob(conf); + ... +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + It has to be specified how key and values are passed from one element of + the chain to the next, by value or by reference. If a Reducer leverages the + assumed semantics that the key and values are not modified by the collector + 'by value' must be used. If the Reducer does not expect this semantics, as + an optimization to avoid serialization and deserialization 'by reference' + can be used. +

    + For the added Reducer the configuration given for it, + reducerConf, have precedence over the job's JobConf. This + precedence is in effect when the task is running. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainReducer, this is done by the setReducer or the addMapper for the last + element in the chain. + + @param job job's JobConf to add the Reducer class. + @param klass the Reducer class to add. + @param inputKeyClass reducer input key class. + @param inputValueClass reducer input value class. + @param outputKeyClass reducer output key class. + @param outputValueClass reducer output value class. + @param byValue indicates if key/values should be passed by value + to the next Mapper in the chain, if any. + @param reducerConf a JobConf with the configuration for the Reducer + class. It is recommended to use a JobConf without default values using the + JobConf(boolean loadDefaults) constructor with FALSE.]]> + + + + + + + + + + + + + + It has to be specified how key and values are passed from one element of + the chain to the next, by value or by reference. If a Mapper leverages the + assumed semantics that the key and values are not modified by the collector + 'by value' must be used. If the Mapper does not expect this semantics, as + an optimization to avoid serialization and deserialization 'by reference' + can be used. +

    + For the added Mapper the configuration given for it, + mapperConf, have precedence over the job's JobConf. This + precedence is in effect when the task is running. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainMapper, this is done by the addMapper for the last mapper in the chain + . + + @param job chain job's JobConf to add the Mapper class. + @param klass the Mapper class to add. + @param inputKeyClass mapper input key class. + @param inputValueClass mapper input value class. + @param outputKeyClass mapper output key class. + @param outputValueClass mapper output value class. + @param byValue indicates if key/values should be passed by value + to the next Mapper in the chain, if any. + @param mapperConf a JobConf with the configuration for the Mapper + class. It is recommended to use a JobConf without default values using the + JobConf(boolean loadDefaults) constructor with FALSE.]]> + + + + + + + If this method is overriden super.configure(...) should be + invoked at the beginning of the overwriter method.]]> + + + + + + + + + + reduce(...) method of the Reducer with the + map(...) methods of the Mappers in the chain.]]> + + + + + + + If this method is overriden super.close() should be + invoked at the end of the overwriter method.]]> + + + + + For each record output by the Reducer, the Mapper classes are invoked in a + chained (or piped) fashion, the output of the first becomes the input of the + second, and so on until the last Mapper, the output of the last Mapper will + be written to the task's output. +

    + The key functionality of this feature is that the Mappers in the chain do not + need to be aware that they are executed after the Reducer or in a chain. + This enables having reusable specialized Mappers that can be combined to + perform composite operations within a single task. +

    + Special care has to be taken when creating chains that the key/values output + by a Mapper are valid for the following Mapper in the chain. It is assumed + all Mappers and the Reduce in the chain use maching output and input key and + value classes as no conversion is done by the chaining code. +

    + Using the ChainMapper and the ChainReducer classes is possible to compose + Map/Reduce jobs that look like [MAP+ / REDUCE MAP*]. And + immediate benefit of this pattern is a dramatic reduction in disk IO. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainReducer, this is done by the setReducer or the addMapper for the last + element in the chain. +

    + ChainReducer usage pattern: +

    +

    + ...
    + conf.setJobName("chain");
    + conf.setInputFormat(TextInputFormat.class);
    + conf.setOutputFormat(TextOutputFormat.class);
    + 

    + JobConf mapAConf = new JobConf(false); + ... + ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class, + Text.class, Text.class, true, mapAConf); +

    + JobConf mapBConf = new JobConf(false); + ... + ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class, + LongWritable.class, Text.class, false, mapBConf); +

    + JobConf reduceConf = new JobConf(false); + ... + ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class, + Text.class, Text.class, true, reduceConf); +

    + ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class, + LongWritable.class, Text.class, false, null); +

    + ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class, + LongWritable.class, LongWritable.class, true, null); +

    + FileInputFormat.setInputPaths(conf, inDir); + FileOutputFormat.setOutputPath(conf, outDir); + ... +

    + JobClient jc = new JobClient(conf); + RunningJob job = jc.submitJob(conf); + ... +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + all splits. + @param freq The frequency with which records will be emitted.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + all splits. + This will read every split at the client, which is very expensive. + @param freq Probability with which a key will be chosen. + @param numSamples Total number of samples to obtain from all selected + splits.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + all splits. + Takes the first numSamples / numSplits records from each split. + @param numSamples Total number of samples to obtain from all selected + splits.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the name output is multi, false + if it is single. If the name output is not defined it returns + false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @param conf job conf to add the named output + @param namedOutput named output name, it has to be a word, letters + and numbers only, cannot be the word 'part' as + that is reserved for the + default output. + @param outputFormatClass OutputFormat class. + @param keyClass key class + @param valueClass value class]]> + + + + + + + + + + + + @param conf job conf to add the named output + @param namedOutput named output name, it has to be a word, letters + and numbers only, cannot be the word 'part' as + that is reserved for the + default output. + @param outputFormatClass OutputFormat class. + @param keyClass key class + @param valueClass value class]]> + + + + + + + + By default these counters are disabled. +

    + MultipleOutputs supports counters, by default the are disabled. + The counters group is the {@link MultipleOutputs} class name. +

    + The names of the counters are the same as the named outputs. For multi + named outputs the name of the counter is the concatenation of the named + output, and underscore '_' and the multiname. + + @param conf job conf to enableadd the named output. + @param enabled indicates if the counters will be enabled or not.]]> +
    +
    + + + + + By default these counters are disabled. +

    + MultipleOutputs supports counters, by default the are disabled. + The counters group is the {@link MultipleOutputs} class name. +

    + The names of the counters are the same as the named outputs. For multi + named outputs the name of the counter is the concatenation of the named + output, and underscore '_' and the multiname. + + + @param conf job conf to enableadd the named output. + @return TRUE if the counters are enabled, FALSE if they are disabled.]]> +
    +
    + + + + + + + + + + + + + @param namedOutput the named output name + @param reporter the reporter + @return the output collector for the given named output + @throws IOException thrown if output collector could not be created]]> + + + + + + + + + + + @param namedOutput the named output name + @param multiName the multi name part + @param reporter the reporter + @return the output collector for the given named output + @throws IOException thrown if output collector could not be created]]> + + + + + + + If overriden subclasses must invoke super.close() at the + end of their close() + + @throws java.io.IOException thrown if any of the MultipleOutput files + could not be closed properly.]]> + + + + OutputCollector passed to + the map() and reduce() methods of the + Mapper and Reducer implementations. +

    + Each additional output, or named output, may be configured with its own + OutputFormat, with its own key class and with its own value + class. +

    + A named output can be a single file or a multi file. The later is refered as + a multi named output. +

    + A multi named output is an unbound set of files all sharing the same + OutputFormat, key class and value class configuration. +

    + When named outputs are used within a Mapper implementation, + key/values written to a name output are not part of the reduce phase, only + key/values written to the job OutputCollector are part of the + reduce phase. +

    + MultipleOutputs supports counters, by default the are disabled. The counters + group is the {@link MultipleOutputs} class name. +

    + The names of the counters are the same as the named outputs. For multi + named outputs the name of the counter is the concatenation of the named + output, and underscore '_' and the multiname. +

    + Job configuration usage pattern is: +

    +
    + JobConf conf = new JobConf();
    +
    + conf.setInputPath(inDir);
    + FileOutputFormat.setOutputPath(conf, outDir);
    +
    + conf.setMapperClass(MOMap.class);
    + conf.setReducerClass(MOReduce.class);
    + ...
    +
    + // Defines additional single text based output 'text' for the job
    + MultipleOutputs.addNamedOutput(conf, "text", TextOutputFormat.class,
    + LongWritable.class, Text.class);
    +
    + // Defines additional multi sequencefile based output 'sequence' for the
    + // job
    + MultipleOutputs.addMultiNamedOutput(conf, "seq",
    +   SequenceFileOutputFormat.class,
    +   LongWritable.class, Text.class);
    + ...
    +
    + JobClient jc = new JobClient();
    + RunningJob job = jc.submitJob(conf);
    +
    + ...
    + 
    +

    + Job configuration usage pattern is: +

    +
    + public class MOReduce implements
    +   Reducer<WritableComparable, Writable> {
    + private MultipleOutputs mos;
    +
    + public void configure(JobConf conf) {
    + ...
    + mos = new MultipleOutputs(conf);
    + }
    +
    + public void reduce(WritableComparable key, Iterator<Writable> values,
    + OutputCollector output, Reporter reporter)
    + throws IOException {
    + ...
    + mos.getCollector("text", reporter).collect(key, new Text("Hello"));
    + mos.getCollector("seq", "A", reporter).collect(key, new Text("Bye"));
    + mos.getCollector("seq", "B", reporter).collect(key, new Text("Chau"));
    + ...
    + }
    +
    + public void close() throws IOException {
    + mos.close();
    + ...
    + }
    +
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It can be used instead of the default implementation, + @link org.apache.hadoop.mapred.MapRunner, when the Map operation is not CPU + bound in order to improve throughput. +

    + Map implementations using this MapRunnable must be thread-safe. +

    + The Map-Reduce job has to be configured to use this MapRunnable class (using + the JobConf.setMapRunnerClass method) and + the number of thread the thread-pool can use with the + mapred.map.multithreadedrunner.threads property, its default + value is 10 threads. +

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + pairs. Uses + {@link StringTokenizer} to break text into tokens.]]> + + + + + + + + + + + + total.order.partitioner.natural.order is not false, a trie + of the first total.order.partitioner.max.trie.depth(2) + 1 bytes + will be built. Otherwise, keys will be located using a binary search of + the partition keyset using the {@link org.apache.hadoop.io.RawComparator} + defined for this job. The input file must be sorted with the same + comparator and contain {@link + org.apache.hadoop.mapred.JobConf#getNumReduceTasks} - 1 keys.]]> + + + + + + + + + + + + R reduces, there are R-1 + keys in the SequenceFile.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + generateKeyValPairs(Object key, Object value); public void + configure(JobConfjob); } + + The package also provides a base class, ValueAggregatorBaseDescriptor, + implementing the above interface. The user can extend the base class and + implement generateKeyValPairs accordingly. + + The primary work of generateKeyValPairs is to emit one or more key/value + pairs based on the input key/value pair. The key in an output key/value pair + encode two pieces of information: aggregation type and aggregation id. The + value will be aggregated onto the aggregation id according the aggregation + type. + + This class offers a function to generate a map/reduce job using Aggregate + framework. The function takes the following parameters: input directory spec + input format (text or sequence file) output directory a file specifying the + user plugin class]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The job can be configured using the static methods in this class, + {@link DBInputFormat}, and {@link DBOutputFormat}. +

    + Alternatively, the properties can be set in the configuration with proper + values. + + @see DBConfiguration#configureDB(JobConf, String, String, String, String) + @see DBInputFormat#setInput(JobConf, Class, String, String) + @see DBInputFormat#setInput(JobConf, Class, String, String, String, String...) + @see DBOutputFormat#setOutput(JobConf, String, String...)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 20070101 AND length > 0)' + @param orderBy the fieldNames in the orderBy clause. + @param fieldNames The field names in the table + @see #setInput(JobConf, Class, String, String)]]> + + + + + + + + + + + + + + DBInputFormat emits LongWritables containing the record number as + key and DBWritables as value. + + The SQL query, and input class can be using one of the two + setInput methods.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {@link DBOutputFormat} accepts <key,value> pairs, where + key has a type extending DBWritable. Returned {@link RecordWriter} + writes only the key to the database with a batch SQL query.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DBWritable. DBWritable, is similar to {@link Writable} + except that the {@link #write(PreparedStatement)} method takes a + {@link PreparedStatement}, and {@link #readFields(ResultSet)} + takes a {@link ResultSet}. +

    + Implementations are responsible for writing the fields of the object + to PreparedStatement, and reading the fields of the object from the + ResultSet. + +

    Example:

    + If we have the following table in the database : +
    + CREATE TABLE MyTable (
    +   counter        INTEGER NOT NULL,
    +   timestamp      BIGINT  NOT NULL,
    + );
    + 
    + then we can read/write the tuples from/to the table with : +

    + public class MyWritable implements Writable, DBWritable {
    +   // Some data     
    +   private int counter;
    +   private long timestamp;
    +       
    +   //Writable#write() implementation
    +   public void write(DataOutput out) throws IOException {
    +     out.writeInt(counter);
    +     out.writeLong(timestamp);
    +   }
    +       
    +   //Writable#readFields() implementation
    +   public void readFields(DataInput in) throws IOException {
    +     counter = in.readInt();
    +     timestamp = in.readLong();
    +   }
    +       
    +   public void write(PreparedStatement statement) throws SQLException {
    +     statement.setInt(1, counter);
    +     statement.setLong(2, timestamp);
    +   }
    +       
    +   public void readFields(ResultSet resultSet) throws SQLException {
    +     counter = resultSet.getInt(1);
    +     timestamp = resultSet.getLong(2);
    +   } 
    + }
    + 

    ]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When constructing the instance, if the factory property + contextName.class exists, + its value is taken to be the name of the class to instantiate. Otherwise, + the default is to create an instance of + org.apache.hadoop.metrics.spi.NullContext, which is a + dummy "no-op" context which will cause all metric data to be discarded. + + @param contextName the name of the context + @return the named MetricsContext]]> + + + + + + + + + + + + + + When the instance is constructed, this method checks if the file + hadoop-metrics.properties exists on the class path. If it + exists, it must be in the format defined by java.util.Properties, and all + the properties in the file are set as attributes on the newly created + ContextFactory instance. + + @return the singleton ContextFactory instance]]> + + + + getFactory() method.]]> + + + + + + + + + + + + + + + + + + + startMonitoring() again after calling + this. + @see #close()]]> + + + + + + + + + + + + + + + + recordName. + Throws an exception if the metrics implementation is configured with a fixed + set of record names and recordName is not in that set. + + @param recordName the name of the record + @throws MetricsException if recordName conflicts with configuration data]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A record name identifies the kind of data to be reported. For example, a + program reporting statistics relating to the disks on a computer might use + a record name "diskStats".

    + + A record has zero or more tags. A tag has a name and a value. To + continue the example, the "diskStats" record might use a tag named + "diskName" to identify a particular disk. Sometimes it is useful to have + more than one tag, so there might also be a "diskType" with value "ide" or + "scsi" or whatever.

    + + A record also has zero or more metrics. These are the named + values that are to be reported to the metrics system. In the "diskStats" + example, possible metric names would be "diskPercentFull", "diskPercentBusy", + "kbReadPerSecond", etc.

    + + The general procedure for using a MetricsRecord is to fill in its tag and + metric values, and then call update() to pass the record to the + client library. + Metric data is not immediately sent to the metrics system + each time that update() is called. + An internal table is maintained, identified by the record name. This + table has columns + corresponding to the tag and the metric names, and rows + corresponding to each unique set of tag values. An update + either modifies an existing row in the table, or adds a new row with a set of + tag values that are different from all the other rows. Note that if there + are no tags, then there can be at most one row in the table.

    + + Once a row is added to the table, its data will be sent to the metrics system + on every timer period, whether or not it has been updated since the previous + timer period. If this is inappropriate, for example if metrics were being + reported by some transient object in an application, the remove() + method can be used to remove the row and thus stop the data from being + sent.

    + + Note that the update() method is atomic. This means that it is + safe for different threads to be updating the same metric. More precisely, + it is OK for different threads to call update() on MetricsRecord instances + with the same set of tag names and tag values. Different threads should + not use the same MetricsRecord instance at the same time.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MetricsContext.registerUpdater().]]> + + + + + + + + + + + + + + + + + + + + + + + + + fileName attribute, + if specified. Otherwise the data will be written to standard + output.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is configured by setting ContextFactory attributes which in turn + are usually configured through a properties file. All the attributes are + prefixed by the contextName. For example, the properties file might contain: +

    + myContextName.fileName=/tmp/metrics.log
    + myContextName.period=5
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + contextName.tableName. The returned map consists of + those attributes with the contextName and tableName stripped off.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + recordName. + Throws an exception if the metrics implementation is configured with a fixed + set of record names and recordName is not in that set. + + @param recordName the name of the record + @throws MetricsException if recordName conflicts with configuration data]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class implements the internal table of metric data, and the timer + on which data is to be sent to the metrics system. Subclasses must + override the abstract emitRecord method in order to transmit + the data.

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + update + and remove().]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hostname or hostname:port. If + the specs string is null, defaults to localhost:defaultPort. + + @return a list of InetSocketAddress objects.]]> + + + + + + + + + + + + + + + + + + + ,name=" + Where the and are the supplied parameters + + @param serviceName + @param nameName + @param theMbean - the MBean to register + @return the named used to register the MBean]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hadoop.rpc.socket.factory.class.<ClassName>. When no + such parameter exists then fall back on the default socket factory as + configured by hadoop.rpc.socket.factory.class.default. If + this default socket factory is not configured, then fall back on the JVM + default socket factory. + + @param conf the configuration + @param clazz the class (usually a {@link VersionedProtocol}) + @return a socket factory]]> + + + + + + hadoop.rpc.socket.factory.default + + @param conf the configuration + @return the default socket factory as specified in the configuration or + the JVM default socket factory if the configuration does not + contain a default socket factory property.]]> + + + + + + + + + + + + + : + ://:/]]> + + + + + + + + : + ://:/]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + From documentation for {@link #getInputStream(Socket, long)}:
    + Returns InputStream for the socket. If the socket has an associated + SocketChannel then it returns a + {@link SocketInputStream} with the given timeout. If the socket does not + have a channel, {@link Socket#getInputStream()} is returned. In the later + case, the timeout argument is ignored and the timeout set with + {@link Socket#setSoTimeout(int)} applies for reads.

    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getInputStream()}. + + @see #getInputStream(Socket, long) + + @param socket + @return InputStream for reading from the socket. + @throws IOException]]> +
    +
    + + + + + +
    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getInputStream()}. + + @see Socket#getChannel() + + @param socket + @param timeout timeout in milliseconds. This may not always apply. zero + for waiting as long as necessary. + @return InputStream for reading from the socket. + @throws IOException]]> +
    +
    + + + + +
    + + From documentation for {@link #getOutputStream(Socket, long)} :
    + Returns OutputStream for the socket. If the socket has an associated + SocketChannel then it returns a + {@link SocketOutputStream} with the given timeout. If the socket does not + have a channel, {@link Socket#getOutputStream()} is returned. In the later + case, the timeout argument is ignored and the write will wait until + data is available.

    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getOutputStream()}. + + @see #getOutputStream(Socket, long) + + @param socket + @return OutputStream for writing to the socket. + @throws IOException]]> +
    +
    + + + + + +
    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getOutputStream()}. + + @see Socket#getChannel() + + @param socket + @param timeout timeout in milliseconds. This may not always apply. zero + for waiting as long as necessary. + @return OutputStream for writing to the socket. + @throws IOException]]> +
    +
    + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + node + + @param node + a node + @return true if node is already in the tree; false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + scope + if scope starts with ~, choose one from the all nodes except for the + ones in scope; otherwise, choose one from scope + @param scope range of nodes from which a node will be choosen + @return the choosen node]]> + + + + + + + scope but not in excludedNodes + if scope starts with ~, return the number of nodes that are not + in scope and excludedNodes; + @param scope a path string that may start with ~ + @param excludedNodes a list of nodes + @return number of available nodes]]> + + + + + + + + + + + + reader + It linearly scans the array, if a local node is found, swap it with + the first element of the array. + If a local rack node is found, swap it with the first element following + the local node. + If neither local node or local rack node is found, put a random replica + location at postion 0. + It leaves the rest nodes untouched.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + Create a new input stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + + @see SocketInputStream#SocketInputStream(ReadableByteChannel, long) + + @param socket should have a channel associated with it. + @param timeout timeout timeout in milliseconds. must not be negative. + @throws IOException]]> +
    +
    + + + +
    + + Create a new input stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + @see SocketInputStream#SocketInputStream(ReadableByteChannel, long) + + @param socket should have a channel associated with it. + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + +
    + + Create a new ouput stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + + @see SocketOutputStream#SocketOutputStream(WritableByteChannel, long) + + @param socket should have a channel associated with it. + @param timeout timeout timeout in milliseconds. must not be negative. + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + = getCount(). + @param newCapacity The new capacity in bytes.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Index idx = startVector(...); + while (!idx.done()) { + .... // read element of a vector + idx.incr(); + } + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This task takes the given record definition files and compiles them into + java or c++ + files. It is then up to the user to compile the generated files. + +

    The task requires the file or the nested fileset element to be + specified. Optional attributes are language (set the output + language, default is "java"), + destdir (name of the destination directory for generated java/c++ + code, default is ".") and failonerror (specifies error handling + behavior. default is true). +

    Usage

    +
    + <recordcc
    +       destdir="${basedir}/gensrc"
    +       language="java">
    +   <fileset include="**\/*.jr" />
    + </recordcc>
    + 
    ]]> +
    +
    + +
    + + + + + + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ugi as a comma separated string in + conf as a property attr + + The String starts with the user name followed by the default group names, + and other group names. + + @param conf configuration + @param attr property name + @param ugi a UnixUserGroupInformation]]> + + + + + + + + conf + + The object is expected to store with the property name attr + as a comma separated string that starts + with the user name followed by group names. + If the property name is not defined, return null. + It's assumed that there is only one UGI per user. If this user already + has a UGI in the ugi map, return the ugi in the map. + Otherwise, construct a UGI from the configuration, store it in the + ugi map and return it. + + @param conf configuration + @param attr property name + @return a UnixUGI + @throws LoginException if the stored string is ill-formatted.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This tool supports archiving and anaylzing (sort/grep) of log-files. + It takes as input + a) Input uri which will serve uris of the logs to be archived. + b) Output directory (not mandatory). + b) Directory on dfs to archive the logs. + c) The sort/grep patterns for analyzing the files and separator for boundaries. + Usage: + Logalyzer -archive -archiveDir -analysis -logs -grep -sort -separator +

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + in]]> + + + + + + + out.]]> + + + + + + + + + + reset is true, then resets the checksum. + @return number of bytes written. Will be equal to getChecksumSize();]]> + + + + + + + + + reset is true, then resets the checksum. + @return number of bytes written. Will be equal to getChecksumSize();]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GenericOptionsParser to parse only the generic Hadoop + arguments. + + The array of string arguments other than the generic arguments can be + obtained by {@link #getRemainingArgs()}. + + @param conf the Configuration to modify. + @param args command-line arguments.]]> + + + + + GenericOptionsParser to parse given options as well + as generic Hadoop options. + + The resulting CommandLine object can be obtained by + {@link #getCommandLine()}. + + @param conf the configuration to modify + @param options options built by the caller + @param args User-specified arguments]]> + + + + + Strings containing the un-parsed arguments + or empty array if commandLine was not defined.]]> + + + + + CommandLine object + to process the parsed arguments. + + Note: If the object is created with + {@link #GenericOptionsParser(Configuration, String[])}, then returned + object will only contain parsed generic options. + + @return CommandLine representing list of arguments + parsed against Options descriptor.]]> + + + + + + + + + + + + + + + + + GenericOptionsParser is a utility to parse command line + arguments generic to the Hadoop framework. + + GenericOptionsParser recognizes several standarad command + line arguments, enabling applications to easily specify a namenode, a + jobtracker, additional configuration resources etc. + +

    Generic Options

    + +

    The supported generic options are:

    +

    +     -conf <configuration file>     specify a configuration file
    +     -D <property=value>            use value for given property
    +     -fs <local|namenode:port>      specify a namenode
    +     -jt <local|jobtracker:port>    specify a job tracker
    +     -files <comma separated list of files>    specify comma separated
    +                            files to be copied to the map reduce cluster
    +     -libjars <comma separated list of jars>   specify comma separated
    +                            jar files to include in the classpath.
    +     -archives <comma separated list of archives>    specify comma
    +             separated archives to be unarchived on the compute machines.
    +
    + 

    + +

    The general command line syntax is:

    +

    + bin/hadoop command [genericOptions] [commandOptions]
    + 

    + +

    Generic command line arguments might modify + Configuration objects, given to constructors.

    + +

    The functionality is implemented using Commons CLI.

    + +

    Examples:

    +

    + $ bin/hadoop dfs -fs darwin:8020 -ls /data
    + list /data directory in dfs with namenode darwin:8020
    + 
    + $ bin/hadoop dfs -D fs.default.name=darwin:8020 -ls /data
    + list /data directory in dfs with namenode darwin:8020
    +     
    + $ bin/hadoop dfs -conf hadoop-site.xml -ls /data
    + list /data directory in dfs with conf specified in hadoop-site.xml
    +     
    + $ bin/hadoop job -D mapred.job.tracker=darwin:50020 -submit job.xml
    + submit a job to job tracker darwin:50020
    +     
    + $ bin/hadoop job -jt darwin:50020 -submit job.xml
    + submit a job to job tracker darwin:50020
    +     
    + $ bin/hadoop job -jt local -submit job.xml
    + submit a job to local runner
    + 
    + $ bin/hadoop jar -libjars testlib.jar 
    + -archives test.tgz -files file.txt inputjar args
    + job submission with libjars, files and archives
    + 

    + + @see Tool + @see ToolRunner]]> +
    +
    + + + + + + + + + Class<T>) of the + argument of type T. + @param The type of the argument + @param t the object to get it class + @return Class<T>]]> + + + + + + + List<T> to a an array of + T[]. + @param c the Class object of the items in the list + @param list the list to convert]]> + + + + + + List<T> to a an array of + T[]. + @param list the list to convert + @throws ArrayIndexOutOfBoundsException if the list is empty. + Use {@link #toArray(Class, List)} if the list may be empty.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + io.file.buffer.size specified in the given + Configuration. + @param in input stream + @param conf configuration + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if native-hadoop is loaded, + else false]]> + + + + + + true if native hadoop libraries, if present, can be + used for this job; false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { pq.top().change(); pq.adjustTop(); } + instead of
    +  { o = pq.pop(); o.change(); pq.push(o); }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Clients and/or applications can use the provided Progressable + to explicitly report progress to the Hadoop framework. This is especially + important for operations which take an insignificant amount of time since, + in-lieu of the reported progress, the framework has to assume that an error + has occured and time-out the operation.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Class is to be obtained + @return the correctly typed Class of the given object.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Hadoop Pipes + or Hadoop Streaming. + + It also checks to ensure that we are running on a *nix platform else + (e.g. in Cygwin/Windows) it returns null. + @param conf configuration + @return a String[] with the ulimit command arguments or + null if we are running on a non *nix platform or + if the limit is unspecified.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Shell interface. + @param cmd shell command to execute. + @return the output of the executed command.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + Shell can be used to run unix commands like du or + df. It also offers facilities to gate commands by + time-intervals.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ShellCommandExecutorshould be used in cases where the output + of the command needs no explicit parsing and where the command, working + directory and the environment remains unchanged. The output of the command + is stored as-is and is expected to be small.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ArrayList of string values]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + charToEscape in the string + with the escape char escapeChar + + @param str string + @param escapeChar escape char + @param charToEscape the char to be escaped + @return an escaped string]]> + + + + + + + + + + + + + + + + + + + + + + charToEscape in the string + with the escape char escapeChar + + @param str string + @param escapeChar escape char + @param charToEscape the escaped char + @return an unescaped string]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tool, is the standard for any Map-Reduce tool/application. + The tool/application should delegate the handling of + + standard command-line options to {@link ToolRunner#run(Tool, String[])} + and only handle its custom arguments.

    + +

    Here is how a typical Tool is implemented:

    +

    +     public class MyApp extends Configured implements Tool {
    +     
    +       public int run(String[] args) throws Exception {
    +         // Configuration processed by ToolRunner
    +         Configuration conf = getConf();
    +         
    +         // Create a JobConf using the processed conf
    +         JobConf job = new JobConf(conf, MyApp.class);
    +         
    +         // Process custom command-line options
    +         Path in = new Path(args[1]);
    +         Path out = new Path(args[2]);
    +         
    +         // Specify various job-specific parameters     
    +         job.setJobName("my-app");
    +         job.setInputPath(in);
    +         job.setOutputPath(out);
    +         job.setMapperClass(MyApp.MyMapper.class);
    +         job.setReducerClass(MyApp.MyReducer.class);
    +
    +         // Submit the job, then poll for progress until the job is complete
    +         JobClient.runJob(job);
    +       }
    +       
    +       public static void main(String[] args) throws Exception {
    +         // Let ToolRunner handle generic command-line options 
    +         int res = ToolRunner.run(new Configuration(), new Sort(), args);
    +         
    +         System.exit(res);
    +       }
    +     }
    + 

    + + @see GenericOptionsParser + @see ToolRunner]]> +
    +
    + + + + + + + + + + + + Tool by {@link Tool#run(String[])}, after + parsing with the given generic arguments. Uses the given + Configuration, or builds one if null. + + Sets the Tool's configuration with the possibly modified + version of the conf. + + @param conf Configuration for the Tool. + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + Tool with its Configuration. + + Equivalent to run(tool.getConf(), tool, args). + + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + + + ToolRunner can be used to run classes implementing + Tool interface. It works in conjunction with + {@link GenericOptionsParser} to parse the + + generic hadoop command line arguments and modifies the + Configuration of the Tool. The + application-specific options are passed along without being modified. +

    + + @see Tool + @see GenericOptionsParser]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + +
    diff --git a/aarch64/share/hadoop/common/jdiff/hadoop_0.19.2.xml b/aarch64/share/hadoop/common/jdiff/hadoop_0.19.2.xml new file mode 100644 index 0000000..bbce108 --- /dev/null +++ b/aarch64/share/hadoop/common/jdiff/hadoop_0.19.2.xml @@ -0,0 +1,44204 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + final. + + @param name resource to be added, the classpath is examined for a file + with that name.]]> + + + + + + final. + + @param url url of the resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + final. + + @param file file-path of resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + final. + + @param in InputStream to deserialize the object from.]]> + + + + + + + + + + + name property, null if + no such property exists. + + Values are processed for variable expansion + before being returned. + + @param name the property name. + @return the value of the name property, + or null if no such property exists.]]> + + + + + + name property, without doing + variable expansion. + + @param name the property name. + @return the value of the name property, + or null if no such property exists.]]> + + + + + + + value of the name property. + + @param name property name. + @param value property value.]]> + + + + + + + name property. If no such property + exists, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value, or defaultValue if the property + doesn't exist.]]> + + + + + + + name property as an int. + + If no such property exists, or if the specified value is not a valid + int, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as an int, + or defaultValue.]]> + + + + + + + name property to an int. + + @param name property name. + @param value int value of the property.]]> + + + + + + + name property as a long. + If no such property is specified, or if the specified value is not a valid + long, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a long, + or defaultValue.]]> + + + + + + + name property to a long. + + @param name property name. + @param value long value of the property.]]> + + + + + + + name property as a float. + If no such property is specified, or if the specified value is not a valid + float, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a float, + or defaultValue.]]> + + + + + + + name property as a boolean. + If no such property is specified, or if the specified value is not a valid + boolean, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a boolean, + or defaultValue.]]> + + + + + + + name property to a boolean. + + @param name property name. + @param value boolean value of the property.]]> + + + + + + + + + + + + + name property as + a collection of Strings. + If no such property is specified then empty collection is returned. +

    + This is an optimized version of {@link #getStrings(String)} + + @param name property name. + @return property value as a collection of Strings.]]> + + + + + + name property as + an array of Strings. + If no such property is specified then null is returned. + + @param name property name. + @return property value as an array of Strings, + or null.]]> + + + + + + + name property as + an array of Strings. + If no such property is specified then default value is returned. + + @param name property name. + @param defaultValue The default value + @return property value as an array of Strings, + or default value.]]> + + + + + + + name property as + as comma delimited values. + + @param name property name. + @param values The values]]> + + + + + + + + + + + + + + name property + as an array of Class. + The value of the property specifies a list of comma separated class names. + If no such property is specified, then defaultValue is + returned. + + @param name the property name. + @param defaultValue default value. + @return property value as a Class[], + or defaultValue.]]> + + + + + + + name property as a Class. + If no such property is specified, then defaultValue is + returned. + + @param name the class name. + @param defaultValue default value. + @return property value as a Class, + or defaultValue.]]> + + + + + + + + name property as a Class + implementing the interface specified by xface. + + If no such property is specified, then defaultValue is + returned. + + An exception is thrown if the returned class does not implement the named + interface. + + @param name the class name. + @param defaultValue default value. + @param xface the interface implemented by the named class. + @return property value as a Class, + or defaultValue.]]> + + + + + + + + name property to the name of a + theClass implementing the given interface xface. + + An exception is thrown if theClass does not implement the + interface xface. + + @param name property name. + @param theClass property value. + @param xface the interface implemented by the named class.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + + + + + name. + + @param name configuration resource name. + @return an input stream attached to the resource.]]> + + + + + + name. + + @param name configuration resource name. + @return a reader attached to the resource.]]> + + + + + + + + + + + + + + + String + key-value pairs in the configuration. + + @return an iterator over the entries.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + true to set quiet-mode on, false + to turn it off.]]> + + + + + + + + + + + + + + + + + + + Resources + +

    Configurations are specified by resources. A resource contains a set of + name/value pairs as XML data. Each resource is named by either a + String or by a {@link Path}. If named by a String, + then the classpath is examined for a file with that name. If named by a + Path, then the local filesystem is examined directly, without + referring to the classpath. + +

    Unless explicitly turned off, Hadoop by default specifies two + resources, loaded in-order from the classpath:

      +
    1. hadoop-default.xml + : Read-only defaults for hadoop.
    2. +
    3. hadoop-site.xml: Site-specific configuration for a given hadoop + installation.
    4. +
    + Applications may add additional resources, which are loaded + subsequent to these resources in the order they are added. + +

    Final Parameters

    + +

    Configuration parameters may be declared final. + Once a resource declares a value final, no subsequently-loaded + resource can alter that value. + For example, one might define a final parameter with: +

    +  <property>
    +    <name>dfs.client.buffer.dir</name>
    +    <value>/tmp/hadoop/dfs/client</value>
    +    <final>true</final>
    +  </property>
    + + Administrators typically define parameters as final in + hadoop-site.xml for values that user applications may not alter. + +

    Variable Expansion

    + +

    Value strings are first processed for variable expansion. The + available properties are:

      +
    1. Other properties defined in this Configuration; and, if a name is + undefined here,
    2. +
    3. Properties in {@link System#getProperties()}.
    4. +
    + +

    For example, if a configuration resource contains the following property + definitions: +

    +  <property>
    +    <name>basedir</name>
    +    <value>/user/${user.name}</value>
    +  </property>
    +  
    +  <property>
    +    <name>tempdir</name>
    +    <value>${basedir}/tmp</value>
    +  </property>
    + + When conf.get("tempdir") is called, then ${basedir} + will be resolved to another property in this Configuration, while + ${user.name} would then ordinarily be resolved to the value + of the System property with that name.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DistributedCache is a facility provided by the Map-Reduce + framework to cache files (text, archives, jars etc.) needed by applications. +

    + +

    Applications specify the files, via urls (hdfs:// or http://) to be cached + via the {@link org.apache.hadoop.mapred.JobConf}. + The DistributedCache assumes that the + files specified via hdfs:// urls are already present on the + {@link FileSystem} at the path specified by the url.

    + +

    The framework will copy the necessary files on to the slave node before + any tasks for the job are executed on that node. Its efficiency stems from + the fact that the files are only copied once per job and the ability to + cache archives which are un-archived on the slaves.

    + +

    DistributedCache can be used to distribute simple, read-only + data/text files and/or more complex types such as archives, jars etc. + Archives (zip, tar and tgz/tar.gz files) are un-archived at the slave nodes. + Jars may be optionally added to the classpath of the tasks, a rudimentary + software distribution mechanism. Files have execution permissions. + Optionally users can also direct it to symlink the distributed cache file(s) + into the working directory of the task.

    + +

    DistributedCache tracks modification timestamps of the cache + files. Clearly the cache files should not be modified by the application + or externally while the job is executing.

    + +

    Here is an illustrative example on how to use the + DistributedCache:

    +

    +     // Setting up the cache for the application
    +     
    +     1. Copy the requisite files to the FileSystem:
    +     
    +     $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat  
    +     $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip  
    +     $ bin/hadoop fs -copyFromLocal mylib.jar /myapp/mylib.jar
    +     $ bin/hadoop fs -copyFromLocal mytar.tar /myapp/mytar.tar
    +     $ bin/hadoop fs -copyFromLocal mytgz.tgz /myapp/mytgz.tgz
    +     $ bin/hadoop fs -copyFromLocal mytargz.tar.gz /myapp/mytargz.tar.gz
    +     
    +     2. Setup the application's JobConf:
    +     
    +     JobConf job = new JobConf();
    +     DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"), 
    +                                   job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/map.zip", job);
    +     DistributedCache.addFileToClassPath(new Path("/myapp/mylib.jar"), job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytar.tar", job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytgz.tgz", job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytargz.tar.gz", job);
    +     
    +     3. Use the cached files in the {@link org.apache.hadoop.mapred.Mapper}
    +     or {@link org.apache.hadoop.mapred.Reducer}:
    +     
    +     public static class MapClass extends MapReduceBase  
    +     implements Mapper<K, V, K, V> {
    +     
    +       private Path[] localArchives;
    +       private Path[] localFiles;
    +       
    +       public void configure(JobConf job) {
    +         // Get the cached archives/files
    +         localArchives = DistributedCache.getLocalCacheArchives(job);
    +         localFiles = DistributedCache.getLocalCacheFiles(job);
    +       }
    +       
    +       public void map(K key, V value, 
    +                       OutputCollector<K, V> output, Reporter reporter) 
    +       throws IOException {
    +         // Use data from the cached archives/files here
    +         // ...
    +         // ...
    +         output.collect(k, v);
    +       }
    +     }
    +     
    + 

    + + @see org.apache.hadoop.mapred.JobConf + @see org.apache.hadoop.mapred.JobClient]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BufferedFSInputStream + with the specified buffer size, + and saves its argument, the input stream + in, for later use. An internal + buffer array of length size + is created and stored in buf. + + @param in the underlying input stream. + @param size the buffer size. + @exception IllegalArgumentException if size <= 0.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + setReplication of FileSystem + @param src file name + @param replication new replication + @throws IOException + @return true if successful; + false if file does not exist or is a directory]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ' + @deprecated Consider using {@link GenericOptionsParser} instead.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + fs.scheme.class whose value names the FileSystem class. + The entire URI is passed to the FileSystem instance's initialize method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Return all the files that match filePattern and are not checksum + files. Results are sorted by their names. + +

    + A filename pattern is composed of regular characters and + special pattern matching characters, which are: + +

    +
    +
    +

    +

    ? +
    Matches any single character. + +

    +

    * +
    Matches zero or more characters. + +

    +

    [abc] +
    Matches a single character from character set + {a,b,c}. + +

    +

    [a-b] +
    Matches a single character from the character range + {a...b}. Note that character a must be + lexicographically less than or equal to character b. + +

    +

    [^a] +
    Matches a single character that is not from character set or range + {a}. Note that the ^ character must occur + immediately to the right of the opening bracket. + +

    +

    \c +
    Removes (escapes) any special meaning of character c. + +

    +

    {ab,cd} +
    Matches a string from the string set {ab, cd} + +

    +

    {ab,c{de,fh}} +
    Matches a string from the string set {ab, cde, cfh} + +
    +
    +
    + + @param pathPattern a regular expression specifying a pth pattern + + @return an array of paths that match the path pattern + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + All user code that may potentially use the Hadoop Distributed + File System should be written to use a FileSystem object. The + Hadoop DFS is a multi-machine system that appears as a single + disk. It's useful because of its fault tolerance and potentially + very large capacity. + +

    + The local implementation is {@link LocalFileSystem} and distributed + implementation is DistributedFileSystem.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FilterFileSystem contains + some other file system, which it uses as + its basic file system, possibly transforming + the data along the way or providing additional + functionality. The class FilterFileSystem + itself simply overrides all methods of + FileSystem with versions that + pass all requests to the contained file + system. Subclasses of FilterFileSystem + may further override some of these methods + and may also provide additional methods + and fields.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + buf at offset + and checksum into checksum. + The method is used for implementing read, therefore, it should be optimized + for sequential reading + @param pos chunkPos + @param buf desitination buffer + @param offset offset in buf at which to store data + @param len maximun number of bytes to read + @return number of bytes read]]> + + + + + + + + + + + + + + + + + -1 if the end of the + stream is reached. + @exception IOException if an I/O error occurs.]]> + + + + + + + + + This method implements the general contract of the corresponding + {@link InputStream#read(byte[], int, int) read} method of + the {@link InputStream} class. As an additional + convenience, it attempts to read as many bytes as possible by repeatedly + invoking the read method of the underlying stream. This + iterated read continues until one of the following + conditions becomes true:

      + +
    • The specified number of bytes have been read, + +
    • The read method of the underlying stream returns + -1, indicating end-of-file. + +
    If the first read on the underlying stream returns + -1 to indicate end-of-file then this method returns + -1. Otherwise this method returns the number of bytes + actually read. + + @param b destination buffer. + @param off offset at which to start storing bytes. + @param len maximum number of bytes to read. + @return the number of bytes read, or -1 if the end of + the stream has been reached. + @exception IOException if an I/O error occurs. + ChecksumException if any checksum error occurs]]> +
    + + + + + + + + + + + + + + + + + + n bytes of data from the + input stream. + +

    This method may skip more bytes than are remaining in the backing + file. This produces no exception and the number of bytes skipped + may include some number of bytes that were beyond the EOF of the + backing file. Attempting to read from the stream after skipping past + the end will result in -1 indicating the end of the file. + +

    If n is negative, no bytes are skipped. + + @param n the number of bytes to be skipped. + @return the actual number of bytes skipped. + @exception IOException if an I/O error occurs. + ChecksumException if the chunk to skip to is corrupted]]> + + + + + + + This method may seek past the end of the file. + This produces no exception and an attempt to read from + the stream will result in -1 indicating the end of the file. + + @param pos the postion to seek to. + @exception IOException if an I/O error occurs. + ChecksumException if the chunk to seek to is corrupted]]> + + + + + + + + + + len bytes from + stm + + @param stm an input stream + @param buf destiniation buffer + @param offset offset at which to store data + @param len number of bytes to read + @return actual number of bytes read + @throws IOException if there is any IO error]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + len bytes from the specified byte array + starting at offset off and generate a checksum for + each data chunk. + +

    This method stores bytes from the given array into this + stream's buffer before it gets checksumed. The buffer gets checksumed + and flushed to the underlying output stream when all data + in a checksum chunk are in the buffer. If the buffer is empty and + requested length is at least as large as the size of next checksum chunk + size, this method will checksum and write the chunk directly + to the underlying output stream. Thus it avoids uneccessary data copy. + + @param b the data. + @param off the start offset in the data. + @param len the number of bytes to write. + @exception IOException if an I/O error occurs.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if and only if pathname + should be included]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + trash feature. Files are moved to a user's trash + directory, a subdirectory of their home directory named ".Trash". Files are + initially moved to a current sub-directory of the trash directory. + Within that sub-directory their original path is preserved. Periodically + one may checkpoint the current trash and remove older checkpoints. (This + design permits trash management without enumeration of the full trash + content, without date support in the filesystem, and without clock + synchronization.)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} backed by an FTP client provided by Apache Commons Net. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is a tool for migrating data from an older to a newer version + of an S3 filesystem. +

    +

    + All files in the filesystem are migrated by re-writing the block metadata + - no datafiles are touched. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + Extracts AWS credentials from the filesystem URI or configuration. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A block-based {@link FileSystem} backed by + Amazon S3. +

    + @see NativeS3FileSystem]]> +
    +
    + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If f is a file, this method will make a single call to S3. + If f is a directory, this method will make a maximum of + (n / 1000) + 2 calls to S3, where n is the total number of + files and directories contained directly in f. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} for reading and writing files stored on + Amazon S3. + Unlike {@link org.apache.hadoop.fs.s3.S3FileSystem} this implementation + stores files on S3 in their + native form so they can be read by other S3 tools. +

    + @see org.apache.hadoop.fs.s3.S3FileSystem]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + . + @param name The name of the server + @param port The port to use on the server + @param findPort whether the server should start at the given port and + increment by 1 until it finds a free port. + @param conf Configuration]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + points to the log directory + "/static/" -> points to common static files (src/webapps/static) + "/" -> the jsp server code from (src/webapps/)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + nth value.]]> + + + + + + + + + + + + + + + + + + + + + nth value in the file.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + public class IntArrayWritable extends ArrayWritable { + public IntArrayWritable() { + super(IntWritable.class); + } + } + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a ByteWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new DataInputStream and + ByteArrayInputStream each time data is read. + +

    Typical usage is something like the following:

    +
    + DataInputBuffer buffer = new DataInputBuffer();
    + while (... loop condition ...) {
    +   byte[] data = ... get data ...;
    +   int dataLength = ... get data length ...;
    +   buffer.reset(data, dataLength);
    +   ... read buffer using DataInput methods ...
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new DataOutputStream and + ByteArrayOutputStream each time data is written. + +

    Typical usage is something like the following:

    +
    + DataOutputBuffer buffer = new DataOutputBuffer();
    + while (... loop condition ...) {
    +   buffer.reset();
    +   ... write buffer using DataOutput methods ...
    +   byte[] data = buffer.getData();
    +   int dataLength = buffer.getLength();
    +   ... write data to its ultimate destination ...
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + the class of the item + @param conf the configuration to store + @param item the object to be stored + @param keyName the name of the key to use + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param items the objects to be stored + @param keyName the name of the key to use + @throws IndexOutOfBoundsException if the items array is empty + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + DefaultStringifier offers convenience methods to store/load objects to/from + the configuration. + + @param the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a DoubleWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a FloatWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When two sequence files, which have same Key type but different Value + types, are mapped out to reduce, multiple Value types is not allowed. + In this case, this class can help you wrap instances with different types. +

    + +

    + Compared with ObjectWritable, this class is much more effective, + because ObjectWritable will append the class declaration as a String + into the output file in every Key-Value pair. +

    + +

    + Generic Writable implements {@link Configurable} interface, so that it will be + configured by the framework. The configuration is passed to the wrapped objects + implementing {@link Configurable} interface before deserialization. +

    + + how to use it:
    + 1. Write your own class, such as GenericObject, which extends GenericWritable.
    + 2. Implements the abstract method getTypes(), defines + the classes which will be wrapped in GenericObject in application. + Attention: this classes defined in getTypes() method, must + implement Writable interface. +

    + + The code looks like this: +
    + public class GenericObject extends GenericWritable {
    + 
    +   private static Class[] CLASSES = {
    +               ClassType1.class, 
    +               ClassType2.class,
    +               ClassType3.class,
    +               };
    +
    +   protected Class[] getTypes() {
    +       return CLASSES;
    +   }
    +
    + }
    + 
    + + @since Nov 8, 2006]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new InputStream and + ByteArrayInputStream each time data is read. + +

    Typical usage is something like the following:

    +
    + InputBuffer buffer = new InputBuffer();
    + while (... loop condition ...) {
    +   byte[] data = ... get data ...;
    +   int dataLength = ... get data length ...;
    +   buffer.reset(data, dataLength);
    +   ... read buffer using InputStream methods ...
    + }
    + 
    + @see DataInputBuffer + @see DataOutput]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a IntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + closes the input and output streams + at the end. + @param in InputStrem to read from + @param out OutputStream to write to + @param conf the Configuration object]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ignore any {@link IOException} or + null pointers. Must only be used for cleanup in exception handlers. + @param log the log to record problems to at debug level. Can be null. + @param closeables the objects to close]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a LongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A map is a directory containing two files, the data file, + containing all keys and values in the map, and a smaller index + file, containing a fraction of the keys. The fraction is determined by + {@link Writer#getIndexInterval()}. + +

    The index file is read entirely into memory. Thus key implementations + should try to keep themselves small. + +

    Map files are created by adding entries in-order. To maintain a large + database, perform updates by copying the previous version of a database and + merging in a sorted change list, to create a new version of the database in + a new file. Sorting large change lists can be done with {@link + SequenceFile.Sorter}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key and + val. Returns true if such a pair exists and false when at + the end of the map]]> + + + + + + + + + + + + + + + + key or if it does not exist, at the first entry + after the named key. + +- * @param key - key that we're trying to find +- * @param val - data value if key is found +- * @return - the key that was the closest match or null if eof.]]> + + + + + + + + + key does not exist, return + the first entry that falls just before the key. Otherwise, + return the record that sorts just after. + @return - the key that was the closest match or null if eof.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is an MD5Hash whose digest contains the + same values.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new OutputStream and + ByteArrayOutputStream each time data is written. + +

    Typical usage is something like the following:

    +
    + OutputBuffer buffer = new OutputBuffer();
    + while (... loop condition ...) {
    +   buffer.reset();
    +   ... write buffer using OutputStream methods ...
    +   byte[] data = buffer.getData();
    +   int dataLength = buffer.getLength();
    +   ... write data to its ultimate destination ...
    + }
    + 
    + @see DataOutputBuffer + @see InputBuffer]]> +
    +
    + + + + + + + + + + + + + + + A {@link Comparator} that operates directly on byte representations of + objects. +

    + @param + @see DeserializerComparator]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SequenceFiles are flat files consisting of binary key/value + pairs. + +

    SequenceFile provides {@link Writer}, {@link Reader} and + {@link Sorter} classes for writing, reading and sorting respectively.

    + + There are three SequenceFile Writers based on the + {@link CompressionType} used to compress key/value pairs: +
      +
    1. + Writer : Uncompressed records. +
    2. +
    3. + RecordCompressWriter : Record-compressed files, only compress + values. +
    4. +
    5. + BlockCompressWriter : Block-compressed files, both keys & + values are collected in 'blocks' + separately and compressed. The size of + the 'block' is configurable. +
    + +

    The actual compression algorithm used to compress key and/or values can be + specified by using the appropriate {@link CompressionCodec}.

    + +

    The recommended way is to use the static createWriter methods + provided by the SequenceFile to chose the preferred format.

    + +

    The {@link Reader} acts as the bridge and can read any of the above + SequenceFile formats.

    + +

    SequenceFile Formats

    + +

    Essentially there are 3 different formats for SequenceFiles + depending on the CompressionType specified. All of them share a + common header described below. + +

    +
      +
    • + version - 3 bytes of magic header SEQ, followed by 1 byte of actual + version number (e.g. SEQ4 or SEQ6) +
    • +
    • + keyClassName -key class +
    • +
    • + valueClassName - value class +
    • +
    • + compression - A boolean which specifies if compression is turned on for + keys/values in this file. +
    • +
    • + blockCompression - A boolean which specifies if block-compression is + turned on for keys/values in this file. +
    • +
    • + compression codec - CompressionCodec class which is used for + compression of keys and/or values (if compression is + enabled). +
    • +
    • + metadata - {@link Metadata} for this file. +
    • +
    • + sync - A sync marker to denote end of the header. +
    • +
    + +
    Uncompressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Value
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +
    Record-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Compressed Value
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +
    Block-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record Block +
        +
      • Compressed key-lengths block-size
      • +
      • Compressed key-lengths block
      • +
      • Compressed keys block-size
      • +
      • Compressed keys block
      • +
      • Compressed value-lengths block-size
      • +
      • Compressed value-lengths block
      • +
      • Compressed values block-size
      • +
      • Compressed values block
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +

    The compressed blocks of key lengths and value lengths consist of the + actual lengths of individual keys/values encoded in ZeroCompressedInteger + format.

    + + @see CompressionCodec]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key, skipping its + value. True if another entry exists, and false at end of file.]]> + + + + + + + + key and + val. Returns true if such a pair exists and false when at + end of file]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The position passed must be a position returned by {@link + SequenceFile.Writer#getLength()} when writing this file. To seek to an arbitrary + position, use {@link SequenceFile.Reader#sync(long)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SegmentDescriptor + @param segments the list of SegmentDescriptors + @param tmpDir the directory to write temporary files into + @return RawKeyValueIterator + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For best performance, applications should make sure that the {@link + Writable#readFields(DataInput)} implementation of their keys is + very efficient. In particular, it should avoid allocating memory.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This always returns a synchronized position. In other words, + immediately after calling {@link SequenceFile.Reader#seek(long)} with a position + returned by this method, {@link SequenceFile.Reader#next(Writable)} may be called. However + the key may be earlier in the file than key last written when this + method was called (e.g., with block-compression, it may be the first key + in the block that was being written when this method was called).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key. Returns + true if such a key exists and false when at the end of the set.]]> + + + + + + + key. + Returns key, or null if no match exists.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + position. Note that this + method avoids using the converter or doing String instatiation + @return the Unicode scalar value at position or -1 + if the position is invalid or points to a + trailing byte]]> + + + + + + + + + + what in the backing + buffer, starting as position start. The starting + position is measured in bytes and the return value is in + terms of byte position in the buffer. The backing buffer is + not converted to a string for this operation. + @return byte position of the first occurence of the search + string in the UTF-8 buffer or -1 if not found]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a Text with the same contents.]]> + + + + + + + + + + + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException.]]> + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException. + @return ByteBuffer: bytes stores at ByteBuffer.array() + and length is ByteBuffer.limit()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + In + addition, it provides methods for string traversal without converting the + byte array to a string.

    Also includes utilities for + serializing/deserialing a string, coding/decoding a string, checking if a + byte array contains valid UTF8 code, calculating the length of an encoded + string.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a UTF8 with the same contents.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + Also includes utilities for efficiently reading and writing UTF-8. + + @deprecated replaced by Text]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is useful when a class may evolve, so that instances written by the + old version of the class may still be processed by the new version. To + handle this situation, {@link #readFields(DataInput)} + implementations should catch {@link VersionMismatchException}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VIntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VLongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + out. + + @param out DataOuput to serialize this object into. + @throws IOException]]> + + + + + + + in. + +

    For efficiency, implementations should attempt to re-use storage in the + existing object where possible.

    + + @param in DataInput to deseriablize this object from. + @throws IOException]]> +
    + + + Any key or value type in the Hadoop Map-Reduce + framework implements this interface.

    + +

    Implementations typically implement a static read(DataInput) + method which constructs a new instance, calls {@link #readFields(DataInput)} + and returns the instance.

    + +

    Example:

    +

    +     public class MyWritable implements Writable {
    +       // Some data     
    +       private int counter;
    +       private long timestamp;
    +       
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +       
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +       
    +       public static MyWritable read(DataInput in) throws IOException {
    +         MyWritable w = new MyWritable();
    +         w.readFields(in);
    +         return w;
    +       }
    +     }
    + 

    ]]> +
    + + + + + + + + WritableComparables can be compared to each other, typically + via Comparators. Any type which is to be used as a + key in the Hadoop Map-Reduce framework should implement this + interface.

    + +

    Example:

    +

    +     public class MyWritableComparable implements WritableComparable {
    +       // Some data
    +       private int counter;
    +       private long timestamp;
    +       
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +       
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +       
    +       public int compareTo(MyWritableComparable w) {
    +         int thisValue = this.value;
    +         int thatValue = ((IntWritable)o).value;
    +         return (thisValue < thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
    +       }
    +     }
    + 

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The default implementation reads the data into two {@link + WritableComparable}s (using {@link + Writable#readFields(DataInput)}, then calls {@link + #compare(WritableComparable,WritableComparable)}.]]> + + + + + + + The default implementation uses the natural ordering, calling {@link + Comparable#compareTo(Object)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This base implemenation uses the natural ordering. To define alternate + orderings, override {@link #compare(WritableComparable,WritableComparable)}. + +

    One may optimize compare-intensive operations by overriding + {@link #compare(byte[],int,int,byte[],int,int)}. Static utility methods are + provided to assist in optimized implementations of this method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Enum type + @param in DataInput to read from + @param enumType Class type of Enum + @return Enum represented by String read from DataInput + @throws IOException]]> + + + + + + + + + + + + + + + + len number of bytes in input streamin + @param in input stream + @param len number of bytes to skip + @throws IOException when skipped less number of bytes]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CompressionCodec for which to get the + Compressor + @return Compressor for the given + CompressionCodec from the pool or a new one]]> + + + + + + CompressionCodec for which to get the + Decompressor + @return Decompressor for the given + CompressionCodec the pool or a new one]]> + + + + + + Compressor to be returned to the pool]]> + + + + + + Decompressor to be returned to the + pool]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Implementations are assumed to be buffered. This permits clients to + reposition the underlying input stream then call {@link #resetState()}, + without having to also synchronize client buffers.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + true if a preset dictionary is needed for decompression. + @return true if a preset dictionary is needed for decompression]]> + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if native-lzo library is loaded & initialized; + else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + lzo compression/decompression pair. + http://www.oberhumer.com/opensource/lzo/]]> + + + + + + + + + + + + + + + + + + + + + + + lzo compression/decompression pair compatible with lzop. + http://www.lzop.org/]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FIXME: This array should be in a private or package private location, + since it could be modified by malicious code. +

    ]]> +
    + + + + This interface is public for historical purposes. You should have no need to + use it. +

    ]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Although BZip2 headers are marked with the magic "Bz" this + constructor expects the next byte in the stream to be the first one after + the magic. Thus callers have to skip the first two bytes. Otherwise this + constructor will throw an exception. +

    + + @throws IOException + if the stream content is malformed or an I/O error occurs. + @throws NullPointerException + if in == null]]> +
    +
    + + + + + + + + + + + + + + + The decompression requires large amounts of memory. Thus you should call the + {@link #close() close()} method as soon as possible, to force + CBZip2InputStream to release the allocated memory. See + {@link CBZip2OutputStream CBZip2OutputStream} for information about memory + usage. +

    + +

    + CBZip2InputStream reads bytes from the compressed source stream via + the single byte {@link java.io.InputStream#read() read()} method exclusively. + Thus you should consider to use a buffered source stream. +

    + +

    + Instances of this class are not threadsafe. +

    ]]> +
    +
    + + + + + + + + CBZip2OutputStream with a blocksize of 900k. + +

    + Attention: The caller is resonsible to write the two BZip2 magic + bytes "BZ" to the specified stream prior to calling this + constructor. +

    + + @param out * + the destination stream. + + @throws IOException + if an I/O error occurs in the specified stream. + @throws NullPointerException + if out == null.]]> +
    +
    + + + + CBZip2OutputStream with specified blocksize. + +

    + Attention: The caller is resonsible to write the two BZip2 magic + bytes "BZ" to the specified stream prior to calling this + constructor. +

    + + + @param out + the destination stream. + @param blockSize + the blockSize as 100k units. + + @throws IOException + if an I/O error occurs in the specified stream. + @throws IllegalArgumentException + if (blockSize < 1) || (blockSize > 9). + @throws NullPointerException + if out == null. + + @see #MIN_BLOCKSIZE + @see #MAX_BLOCKSIZE]]> +
    +
    + + + + + + + + + + + + + inputLength this method returns MAX_BLOCKSIZE + always. + + @param inputLength + The length of the data which will be compressed by + CBZip2OutputStream.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + == 1.]]> + + + + + == 9.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If you are ever unlucky/improbable enough to get a stack overflow whilst + sorting, increase the following constant and try again. In practice I + have never seen the stack go above 27 elems, so the following limit seems + very generous. +

    ]]> +
    +
    + + + The compression requires large amounts of memory. Thus you should call the + {@link #close() close()} method as soon as possible, to force + CBZip2OutputStream to release the allocated memory. +

    + +

    + You can shrink the amount of allocated memory and maybe raise the compression + speed by choosing a lower blocksize, which in turn may cause a lower + compression ratio. You can avoid unnecessary memory allocation by avoiding + using a blocksize which is bigger than the size of the input. +

    + +

    + You can compute the memory usage for compressing by the following formula: +

    + +
    + <code>400k + (9 * blocksize)</code>.
    + 
    + +

    + To get the memory required for decompression by {@link CBZip2InputStream + CBZip2InputStream} use +

    + +
    + <code>65k + (5 * blocksize)</code>.
    + 
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Memory usage by blocksize
    Blocksize Compression
    + memory usage
    Decompression
    + memory usage
    100k1300k565k
    200k2200k1065k
    300k3100k1565k
    400k4000k2065k
    500k4900k2565k
    600k5800k3065k
    700k6700k3565k
    800k7600k4065k
    900k8500k4565k
    + +

    + For decompression CBZip2InputStream allocates less memory if the + bzipped input is smaller than one block. +

    + +

    + Instances of this class are not threadsafe. +

    + +

    + TODO: Update to BZip2 1.0.1 +

    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + true if lzo compressors are loaded & initialized, + else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if lzo decompressors are loaded & initialized, + else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return the total (non-negative) number of uncompressed bytes input so far]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return the total (non-negative) number of uncompressed bytes input so far]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if native-zlib is loaded & initialized + and can be loaded for this job, else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Keep trying a limited number of times, waiting a fixed time between attempts, + and then fail by re-throwing the exception. +

    ]]> +
    +
    + + + + + + + Keep trying for a maximum time, waiting a fixed time between attempts, + and then fail by re-throwing the exception. +

    ]]> +
    +
    + + + + + + + Keep trying a limited number of times, waiting a growing amount of time between attempts, + and then fail by re-throwing the exception. + The time between attempts is sleepTime mutliplied by the number of tries so far. +

    ]]> +
    +
    + + + + + + + Keep trying a limited number of times, waiting a growing amount of time between attempts, + and then fail by re-throwing the exception. + The time between attempts is sleepTime mutliplied by a random + number in the range of [0, 2 to the number of retries) +

    ]]> +
    +
    + + + + + + Set a default policy with some explicit handlers for specific exceptions. +

    ]]> +
    +
    + + + + + + A retry policy for RemoteException + Set a default policy with some explicit handlers for specific exceptions. +

    ]]> +
    +
    + + + + Try once, and fail by re-throwing the exception. + This corresponds to having no retry mechanism in place. +

    ]]> +
    +
    + + + + Try once, and fail silently for void methods, or by + re-throwing the exception for non-void methods. +

    ]]> +
    +
    + + + + Keep trying forever. +

    ]]> +
    +
    + + + A collection of useful implementations of {@link RetryPolicy}. +

    ]]> +
    +
    + + + + + + + + + + Determines whether the framework should retry a + method for the given exception, and the number + of retries that have been made for that operation + so far. +

    + @param e The exception that caused the method to fail. + @param retries The number of times the method has been retried. + @return true if the method should be retried, + false if the method should not be retried + but shouldn't fail with an exception (only for void methods). + @throws Exception The re-thrown exception e indicating + that the method failed and should not be retried further.]]> +
    +
    + + + Specifies a policy for retrying method failures. + Implementations of this interface should be immutable. +

    ]]> +
    +
    + + + + + + + + + + + + Create a proxy for an interface of an implementation class + using the same retry policy for each method in the interface. +

    + @param iface the interface that the retry will implement + @param implementation the instance whose methods should be retried + @param retryPolicy the policy for retirying method call failures + @return the retry proxy]]> +
    +
    + + + + + + + Create a proxy for an interface of an implementation class + using the a set of retry policies specified by method name. + If no retry policy is defined for a method then a default of + {@link RetryPolicies#TRY_ONCE_THEN_FAIL} is used. +

    + @param iface the interface that the retry will implement + @param implementation the instance whose methods should be retried + @param methodNameToPolicyMap a map of method names to retry policies + @return the retry proxy]]> +
    +
    + + + A factory for creating retry proxies. +

    ]]> +
    +
    + +
    + + + + + + + + Prepare the deserializer for reading.

    ]]> +
    +
    + + + + + + Deserialize the next object from the underlying input stream. + If the object t is non-null then this deserializer + may set its internal state to the next object read from the input + stream. Otherwise, if the object t is null a new + deserialized object will be created. +

    + @return the deserialized object]]> +
    +
    + + + + Close the underlying input stream and clear up any resources.

    ]]> +
    +
    + + + Provides a facility for deserializing objects of type from an + {@link InputStream}. +

    + +

    + Deserializers are stateful, but must not buffer the input since + other producers may read from the input between calls to + {@link #deserialize(Object)}. +

    + @param ]]> +
    +
    + + + + + + + + + + + + + + + + + + A {@link RawComparator} that uses a {@link Deserializer} to deserialize + the objects to be compared so that the standard {@link Comparator} can + be used to compare them. +

    +

    + One may optimize compare-intensive operations by using a custom + implementation of {@link RawComparator} that operates directly + on byte representations. +

    + @param ]]> +
    +
    + + + + + + + + + + + + + + + + + + An experimental {@link Serialization} for Java {@link Serializable} classes. +

    + @see JavaSerializationComparator]]> +
    +
    + + + + + + + + + + + + + A {@link RawComparator} that uses a {@link JavaSerialization} + {@link Deserializer} to deserialize objects that are then compared via + their {@link Comparable} interfaces. +

    + @param + @see JavaSerialization]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + Encapsulates a {@link Serializer}/{@link Deserializer} pair. +

    + @param ]]> +
    +
    + + + + + + + Serializations are found by reading the io.serializations + property from conf, which is a comma-delimited list of + classnames. +

    ]]> +
    +
    + + + + + + + + + + + + A factory for {@link Serialization}s. +

    ]]> +
    +
    + + + + + + + + Prepare the serializer for writing.

    ]]> +
    +
    + + + + + Serialize t to the underlying output stream.

    ]]> +
    +
    + + + + Close the underlying output stream and clear up any resources.

    ]]> +
    +
    + + + Provides a facility for serializing objects of type to an + {@link OutputStream}. +

    + +

    + Serializers are stateful, but must not buffer the output since + other producers may write to the output between calls to + {@link #serialize(Object)}. +

    + @param ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + param, to the IPC server running at + address, returning the value. Throws exceptions if there are + network problems or if the remote code threw an exception.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Unwraps any IOException. + + @param lookupTypes the desired exception class. + @return IOException, which is either the lookupClass exception or this.]]> + + + + + This unwraps any Throwable that has a constructor taking + a String as a parameter. + Otherwise it returns this. + + @return Throwable]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + protocol is a Java interface. All parameters and return types must + be one of: + +
    • a primitive type, boolean, byte, + char, short, int, long, + float, double, or void; or
    • + +
    • a {@link String}; or
    • + +
    • a {@link Writable}; or
    • + +
    • an array of the above types
    + + All methods in the protocol should throw only IOException. No field data of + the protocol instance is transmitted.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + handlerCount determines + the number of handler threads that will be used to process calls.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + This class has a number of metrics variables that are publicly accessible; + these variables (objects) have methods to update their values; + for example: +

    {@link #rpcQueueTime}.inc(time)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For the statistics that are sampled and averaged, one must specify + a metrics context that does periodic update calls. Most do. + The default Null metrics context however does NOT. So if you aren't + using any other metrics context then you can turn on the viewing and averaging + of sampled metrics by specifying the following two lines + in the hadoop-meterics.properties file: +

    +        rpc.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
    +        rpc.period=10
    +  
    +

    + Note that the metrics are collected regardless of the context used. + The context with the update thread is used to average the data periodically]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobTracker, + as {@link JobTracker.State} + + @return the current state of the JobTracker.]]> + + + + + + + + + + + + ClusterStatus provides clients with information such as: +

      +
    1. + Size of the cluster. +
    2. +
    3. + Task capacity of the cluster. +
    4. +
    5. + The number of currently running map & reduce tasks. +
    6. +
    7. + State of the JobTracker. +
    8. +

    + +

    Clients can query for the latest ClusterStatus, via + {@link JobClient#getClusterStatus()}.

    + + @see JobClient]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Counters represent global counters, defined either by the + Map-Reduce framework or applications. Each Counter can be of + any {@link Enum} type.

    + +

    Counters are bunched into {@link Group}s, each comprising of + counters from a particular Enum class.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Group of counters, comprising of counters from a particular + counter {@link Enum} class. + +

    Grouphandles localization of the class name and the + counter names.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileInputFormat implementations can override this and return + false to ensure that individual input files are never split-up + so that {@link Mapper}s process entire files. + + @param fs the file system that the file is on + @param filename the file name to check + @return is this file splitable?]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileInputFormat is the base class for all file-based + InputFormats. This provides a generic implementation of + {@link #getSplits(JobConf, int)}. + Subclasses of FileInputFormat can also override the + {@link #isSplitable(FileSystem, Path)} method to ensure input-files are + not split-up and are processed as a whole by {@link Mapper}s.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the job output should be compressed, + false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tasks' Side-Effect Files + +

    Note: The following is valid only if the {@link OutputCommitter} + is {@link FileOutputCommitter}. If OutputCommitter is not + a FileOutputCommitter, the task's temporary output + directory is same as {@link #getOutputPath(JobConf)} i.e. + ${mapred.output.dir}$

    + +

    Some applications need to create/write-to side-files, which differ from + the actual job-outputs. + +

    In such cases there could be issues with 2 instances of the same TIP + (running simultaneously e.g. speculative tasks) trying to open/write-to the + same file (path) on HDFS. Hence the application-writer will have to pick + unique names per task-attempt (e.g. using the attemptid, say + attempt_200709221812_0001_m_000000_0), not just per TIP.

    + +

    To get around this the Map-Reduce framework helps the application-writer + out by maintaining a special + ${mapred.output.dir}/_temporary/_${taskid} + sub-directory for each task-attempt on HDFS where the output of the + task-attempt goes. On successful completion of the task-attempt the files + in the ${mapred.output.dir}/_temporary/_${taskid} (only) + are promoted to ${mapred.output.dir}. Of course, the + framework discards the sub-directory of unsuccessful task-attempts. This + is completely transparent to the application.

    + +

    The application-writer can take advantage of this by creating any + side-files required in ${mapred.work.output.dir} during execution + of his reduce-task i.e. via {@link #getWorkOutputPath(JobConf)}, and the + framework will move them out similarly - thus she doesn't have to pick + unique paths per task-attempt.

    + +

    Note: the value of ${mapred.work.output.dir} during + execution of a particular task-attempt is actually + ${mapred.output.dir}/_temporary/_{$taskid}, and this value is + set by the map-reduce framework. So, just create any side-files in the + path returned by {@link #getWorkOutputPath(JobConf)} from map/reduce + task to take advantage of this feature.

    + +

    The entire discussion holds true for maps of jobs with + reducer=NONE (i.e. 0 reduces) since output of the map, in that case, + goes directly to HDFS.

    + + @return the {@link Path} to the task's temporary output directory + for the map-reduce job.]]> +
    +
    + + + + + + + + + + + + + The generated name can be used to create custom files from within the + different tasks for the job, the names for different tasks will not collide + with each other.

    + +

    The given name is postfixed with the task type, 'm' for maps, 'r' for + reduces and the task partition number. For example, give a name 'test' + running on the first map o the job the generated name will be + 'test-m-00000'.

    + + @param conf the configuration for the job. + @param name the name to make unique. + @return a unique name accross all tasks of the job.]]> +
    +
    + + + + + The path can be used to create custom files from within the map and + reduce tasks. The path name will be unique for each task. The path parent + will be the job output directory.

    ls + +

    This method uses the {@link #getUniqueName} method to make the file name + unique for the task.

    + + @param conf the configuration for the job. + @param name the name for the file. + @return a unique path accross all tasks of the job.]]> +
    +
    + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Each {@link InputSplit} is then assigned to an individual {@link Mapper} + for processing.

    + +

    Note: The split is a logical split of the inputs and the + input files are not physically split into chunks. For e.g. a split could + be <input-file-path, start, offset> tuple. + + @param job job configuration. + @param numSplits the desired number of splits, a hint. + @return an array of {@link InputSplit}s for the job.]]> + + + + + + + + + It is the responsibility of the RecordReader to respect + record boundaries while processing the logical split to present a + record-oriented view to the individual task.

    + + @param split the {@link InputSplit} + @param job the job that this split belongs to + @return a {@link RecordReader}]]> +
    +
    + + InputFormat describes the input-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the InputFormat of the + job to:

    +

      +
    1. + Validate the input-specification of the job. +
    2. + Split-up the input file(s) into logical {@link InputSplit}s, each of + which is then assigned to an individual {@link Mapper}. +
    3. +
    4. + Provide the {@link RecordReader} implementation to be used to glean + input records from the logical InputSplit for processing by + the {@link Mapper}. +
    5. +
    + +

    The default behavior of file-based {@link InputFormat}s, typically + sub-classes of {@link FileInputFormat}, is to split the + input into logical {@link InputSplit}s based on the total size, in + bytes, of the input files. However, the {@link FileSystem} blocksize of + the input files is treated as an upper bound for input splits. A lower bound + on the split size can be set via + + mapred.min.split.size.

    + +

    Clearly, logical splits based on input-size is insufficient for many + applications since record boundaries are to respected. In such cases, the + application has to also implement a {@link RecordReader} on whom lies the + responsibilty to respect record-boundaries and present a record-oriented + view of the logical InputSplit to the individual task. + + @see InputSplit + @see RecordReader + @see JobClient + @see FileInputFormat]]> + + + + + + + + + + InputSplit. + + @return the number of bytes in the input split. + @throws IOException]]> + + + + + + InputSplit is + located as an array of Strings. + @throws IOException]]> + + + + InputSplit represents the data to be processed by an + individual {@link Mapper}. + +

    Typically, it presents a byte-oriented view on the input and is the + responsibility of {@link RecordReader} of the job to process this and present + a record-oriented view. + + @see InputFormat + @see RecordReader]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobClient.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + jobid doesn't correspond to any known job. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobClient is the primary interface for the user-job to interact + with the {@link JobTracker}. + + JobClient provides facilities to submit jobs, track their + progress, access component-tasks' reports/logs, get the Map-Reduce cluster + status information etc. + +

    The job submission process involves: +

      +
    1. + Checking the input and output specifications of the job. +
    2. +
    3. + Computing the {@link InputSplit}s for the job. +
    4. +
    5. + Setup the requisite accounting information for the {@link DistributedCache} + of the job, if necessary. +
    6. +
    7. + Copying the job's jar and configuration to the map-reduce system directory + on the distributed file-system. +
    8. +
    9. + Submitting the job to the JobTracker and optionally monitoring + it's status. +
    10. +

    + + Normally the user creates the application, describes various facets of the + job via {@link JobConf} and then uses the JobClient to submit + the job and monitor its progress. + +

    Here is an example on how to use JobClient:

    +

    +     // Create a new JobConf
    +     JobConf job = new JobConf(new Configuration(), MyJob.class);
    +     
    +     // Specify various job-specific parameters     
    +     job.setJobName("myjob");
    +     
    +     job.setInputPath(new Path("in"));
    +     job.setOutputPath(new Path("out"));
    +     
    +     job.setMapperClass(MyJob.MyMapper.class);
    +     job.setReducerClass(MyJob.MyReducer.class);
    +
    +     // Submit the job, then poll for progress until the job is complete
    +     JobClient.runJob(job);
    + 

    + +

    Job Control

    + +

    At times clients would chain map-reduce jobs to accomplish complex tasks + which cannot be done via a single map-reduce job. This is fairly easy since + the output of the job, typically, goes to distributed file-system and that + can be used as the input for the next job.

    + +

    However, this also means that the onus on ensuring jobs are complete + (success/failure) lies squarely on the clients. In such situations the + various job-control options are: +

      +
    1. + {@link #runJob(JobConf)} : submits the job and returns only after + the job has completed. +
    2. +
    3. + {@link #submitJob(JobConf)} : only submits the job, then poll the + returned handle to the {@link RunningJob} to query status and make + scheduling decisions. +
    4. +
    5. + {@link JobConf#setJobEndNotificationURI(String)} : setup a notification + on job-completion, thus avoiding polling. +
    6. +

    + + @see JobConf + @see ClusterStatus + @see Tool + @see DistributedCache]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If the parameter {@code loadDefaults} is false, the new instance + will not load resources from the default files. + + @param loadDefaults specifies whether to load from the default files]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if framework should keep the intermediate files + for failed tasks, false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the outputs of the maps are to be compressed, + false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This comparator should be provided if the equivalence rules for keys + for sorting the intermediates are different from those for grouping keys + before each call to + {@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.

    + +

    For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed + in a single call to the reduce function if K1 and K2 compare as equal.

    + +

    Since {@link #setOutputKeyComparatorClass(Class)} can be used to control + how keys are sorted, this can be used in conjunction to simulate + secondary sort on values.

    + +

    Note: This is not a guarantee of the reduce sort being + stable in any sense. (In any case, with the order of available + map-outputs to the reduce being non-deterministic, it wouldn't make + that much sense.)

    + + @param theClass the comparator class to be used for grouping keys. + It should implement RawComparator. + @see #setOutputKeyComparatorClass(Class)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + combiner class used to combine map-outputs + before being sent to the reducers. Typically the combiner is same as the + the {@link Reducer} for the job i.e. {@link #getReducerClass()}. + + @return the user-defined combiner class used to combine map-outputs.]]> + + + + + + combiner class used to combine map-outputs + before being sent to the reducers. + +

    The combiner is a task-level aggregation operation which, in some cases, + helps to cut down the amount of data transferred from the {@link Mapper} to + the {@link Reducer}, leading to better performance.

    + +

    Typically the combiner is same as the the Reducer for the + job i.e. {@link #setReducerClass(Class)}.

    + + @param theClass the user-defined combiner class used to combine + map-outputs.]]> +
    +
    + + + true. + + @return true if speculative execution be used for this job, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on, else false.]]> + + + + + true. + + @return true if speculative execution be + used for this job for map tasks, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on for map tasks, + else false.]]> + + + + + true. + + @return true if speculative execution be used + for reduce tasks for this job, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on for reduce tasks, + else false.]]> + + + + + 1. + + @return the number of reduce tasks for this job.]]> + + + + + + Note: This is only a hint to the framework. The actual + number of spawned map tasks depends on the number of {@link InputSplit}s + generated by the job's {@link InputFormat#getSplits(JobConf, int)}. + + A custom {@link InputFormat} is typically used to accurately control + the number of map tasks for the job.

    + +

    How many maps?

    + +

    The number of maps is usually driven by the total size of the inputs + i.e. total number of blocks of the input files.

    + +

    The right level of parallelism for maps seems to be around 10-100 maps + per-node, although it has been set up to 300 or so for very cpu-light map + tasks. Task setup takes awhile, so it is best if the maps take at least a + minute to execute.

    + +

    The default behavior of file-based {@link InputFormat}s is to split the + input into logical {@link InputSplit}s based on the total size, in + bytes, of input files. However, the {@link FileSystem} blocksize of the + input files is treated as an upper bound for input splits. A lower bound + on the split size can be set via + + mapred.min.split.size.

    + +

    Thus, if you expect 10TB of input data and have a blocksize of 128MB, + you'll end up with 82,000 maps, unless {@link #setNumMapTasks(int)} is + used to set it even higher.

    + + @param n the number of map tasks for this job. + @see InputFormat#getSplits(JobConf, int) + @see FileInputFormat + @see FileSystem#getDefaultBlockSize() + @see FileStatus#getBlockSize()]]> +
    +
    + + + 1. + + @return the number of reduce tasks for this job.]]> + + + + + + How many reduces? + +

    The right number of reduces seems to be 0.95 or + 1.75 multiplied by (<no. of nodes> * + + mapred.tasktracker.reduce.tasks.maximum). +

    + +

    With 0.95 all of the reduces can launch immediately and + start transfering map outputs as the maps finish. With 1.75 + the faster nodes will finish their first round of reduces and launch a + second wave of reduces doing a much better job of load balancing.

    + +

    Increasing the number of reduces increases the framework overhead, but + increases load balancing and lowers the cost of failures.

    + +

    The scaling factors above are slightly less than whole numbers to + reserve a few reduce slots in the framework for speculative-tasks, failures + etc.

    + +

    Reducer NONE

    + +

    It is legal to set the number of reduce-tasks to zero.

    + +

    In this case the output of the map-tasks directly go to distributed + file-system, to the path set by + {@link FileOutputFormat#setOutputPath(JobConf, Path)}. Also, the + framework doesn't sort the map-outputs before writing it out to HDFS.

    + + @param n the number of reduce tasks for this job.]]> +
    +
    + + + mapred.map.max.attempts + property. If this property is not already set, the default is 4 attempts. + + @return the max number of attempts per map task.]]> + + + + + + + + + + + mapred.reduce.max.attempts + property. If this property is not already set, the default is 4 attempts. + + @return the max number of attempts per reduce task.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + noFailures, the + tasktracker is blacklisted for this job. + + @param noFailures maximum no. of failures of a given job per tasktracker.]]> + + + + + blacklisted for this job. + + @return the maximum no. of failures of a given job per tasktracker.]]> + + + + + failed. + + Defaults to zero, i.e. any failed map-task results in + the job being declared as {@link JobStatus#FAILED}. + + @return the maximum percentage of map tasks that can fail without + the job being aborted.]]> + + + + + + failed. + + @param percent the maximum percentage of map tasks that can fail without + the job being aborted.]]> + + + + + failed. + + Defaults to zero, i.e. any failed reduce-task results + in the job being declared as {@link JobStatus#FAILED}. + + @return the maximum percentage of reduce tasks that can fail without + the job being aborted.]]> + + + + + + failed. + + @param percent the maximum percentage of reduce tasks that can fail without + the job being aborted.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The debug script can aid debugging of failed map tasks. The script is + given task's stdout, stderr, syslog, jobconf files as arguments.

    + +

    The debug command, run on the node where the map failed, is:

    +

    + $script $stdout $stderr $syslog $jobconf. +

    + +

    The script file is distributed through {@link DistributedCache} + APIs. The script needs to be symlinked.

    + +

    Here is an example on how to submit a script +

    + job.setMapDebugScript("./myscript");
    + DistributedCache.createSymlink(job);
    + DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
    + 

    + + @param mDbgScript the script name]]> +
    +
    + + + + + + + + + The debug script can aid debugging of failed reduce tasks. The script + is given task's stdout, stderr, syslog, jobconf files as arguments.

    + +

    The debug command, run on the node where the map failed, is:

    +

    + $script $stdout $stderr $syslog $jobconf. +

    + +

    The script file is distributed through {@link DistributedCache} + APIs. The script file needs to be symlinked

    + +

    Here is an example on how to submit a script +

    + job.setReduceDebugScript("./myscript");
    + DistributedCache.createSymlink(job);
    + DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
    + 

    + + @param rDbgScript the script name]]> +
    +
    + + + + + + + + null if it hasn't + been set. + @see #setJobEndNotificationURI(String)]]> + + + + + + The uri can contain 2 special parameters: $jobId and + $jobStatus. Those, if present, are replaced by the job's + identifier and completion-status respectively.

    + +

    This is typically used by application-writers to implement chaining of + Map-Reduce jobs in an asynchronous manner.

    + + @param uri the job end notification uri + @see JobStatus + @see Job Completion and Chaining]]> +
    +
    + + + + When a job starts, a shared directory is created at location + + ${mapred.local.dir}/taskTracker/jobcache/$jobid/work/ . + This directory is exposed to the users through + job.local.dir . + So, the tasks can use this space + as scratch space and share files among them.

    + This value is available as System property also. + + @return The localized job specific shared directory]]> +
    +
    + + + + + + + + + + + + + + + + + + JobConf is the primary interface for a user to describe a + map-reduce job to the Hadoop framework for execution. The framework tries to + faithfully execute the job as-is described by JobConf, however: +
      +
    1. + Some configuration parameters might have been marked as + + final by administrators and hence cannot be altered. +
    2. +
    3. + While some job parameters are straight-forward to set + (e.g. {@link #setNumReduceTasks(int)}), some parameters interact subtly + rest of the framework and/or job-configuration and is relatively more + complex for the user to control finely (e.g. {@link #setNumMapTasks(int)}). +
    4. +

    + +

    JobConf typically specifies the {@link Mapper}, combiner + (if any), {@link Partitioner}, {@link Reducer}, {@link InputFormat} and + {@link OutputFormat} implementations to be used etc. + +

    Optionally JobConf is used to specify other advanced facets + of the job such as Comparators to be used, files to be put in + the {@link DistributedCache}, whether or not intermediate and/or job outputs + are to be compressed (and how), debugability via user-provided scripts + ( {@link #setMapDebugScript(String)}/{@link #setReduceDebugScript(String)}), + for doing post-processing on task logs, task's stdout, stderr, syslog. + and etc.

    + +

    Here is an example on how to configure a job via JobConf:

    +

    +     // Create a new JobConf
    +     JobConf job = new JobConf(new Configuration(), MyJob.class);
    +     
    +     // Specify various job-specific parameters     
    +     job.setJobName("myjob");
    +     
    +     FileInputFormat.setInputPaths(job, new Path("in"));
    +     FileOutputFormat.setOutputPath(job, new Path("out"));
    +     
    +     job.setMapperClass(MyJob.MyMapper.class);
    +     job.setCombinerClass(MyJob.MyReducer.class);
    +     job.setReducerClass(MyJob.MyReducer.class);
    +     
    +     job.setInputFormat(SequenceFileInputFormat.class);
    +     job.setOutputFormat(SequenceFileOutputFormat.class);
    + 

    + + @see JobClient + @see ClusterStatus + @see Tool + @see DistributedCache]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + .]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + any job + run on the jobtracker started at 200707121733, we would use : +
     
    + JobID.getTaskIDsPattern("200707121733", null);
    + 
    + which will return : +
     "job_200707121733_[0-9]*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @return a regex pattern matching JobIDs]]> +
    +
    + + + An example JobID is : + job_200707121733_0003 , which represents the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse JobID strings, but rather + use appropriate constructors or {@link #forName(String)} method. + + @see TaskID + @see TaskAttemptID + @see JobTracker#getNewJobId() + @see JobTracker#getStartTime()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + "N/A" + + @return Scheduling information associated to particular Job Queue]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -archives + -files inputjar args]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + zero. + + @param conf configuration for the JobTracker. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Output pairs need not be of the same types as input pairs. A given + input pair may map to zero or many output pairs. Output pairs are + collected with calls to + {@link OutputCollector#collect(Object,Object)}.

    + +

    Applications can use the {@link Reporter} provided to report progress + or just indicate that they are alive. In scenarios where the application + takes an insignificant amount of time to process individual key/value + pairs, this is crucial since the framework might assume that the task has + timed-out and kill that task. The other way of avoiding this is to set + + mapred.task.timeout to a high-enough value (or even zero for no + time-outs).

    + + @param key the input key. + @param value the input value. + @param output collects mapped keys and values. + @param reporter facility to report progress.]]> +
    + + + Maps are the individual tasks which transform input records into a + intermediate records. The transformed intermediate records need not be of + the same type as the input records. A given input pair may map to zero or + many output pairs.

    + +

    The Hadoop Map-Reduce framework spawns one map task for each + {@link InputSplit} generated by the {@link InputFormat} for the job. + Mapper implementations can access the {@link JobConf} for the + job via the {@link JobConfigurable#configure(JobConf)} and initialize + themselves. Similarly they can use the {@link Closeable#close()} method for + de-initialization.

    + +

    The framework then calls + {@link #map(Object, Object, OutputCollector, Reporter)} + for each key/value pair in the InputSplit for that task.

    + +

    All intermediate values associated with a given output key are + subsequently grouped by the framework, and passed to a {@link Reducer} to + determine the final output. Users can control the grouping by specifying + a Comparator via + {@link JobConf#setOutputKeyComparatorClass(Class)}.

    + +

    The grouped Mapper outputs are partitioned per + Reducer. Users can control which keys (and hence records) go to + which Reducer by implementing a custom {@link Partitioner}. + +

    Users can optionally specify a combiner, via + {@link JobConf#setCombinerClass(Class)}, to perform local aggregation of the + intermediate outputs, which helps to cut down the amount of data transferred + from the Mapper to the Reducer. + +

    The intermediate, grouped outputs are always stored in + {@link SequenceFile}s. Applications can specify if and how the intermediate + outputs are to be compressed and which {@link CompressionCodec}s are to be + used via the JobConf.

    + +

    If the job has + zero + reduces then the output of the Mapper is directly written + to the {@link FileSystem} without grouping by keys.

    + +

    Example:

    +

    +     public class MyMapper<K extends WritableComparable, V extends Writable> 
    +     extends MapReduceBase implements Mapper<K, V, K, V> {
    +     
    +       static enum MyCounters { NUM_RECORDS }
    +       
    +       private String mapTaskId;
    +       private String inputFile;
    +       private int noRecords = 0;
    +       
    +       public void configure(JobConf job) {
    +         mapTaskId = job.get("mapred.task.id");
    +         inputFile = job.get("mapred.input.file");
    +       }
    +       
    +       public void map(K key, V val,
    +                       OutputCollector<K, V> output, Reporter reporter)
    +       throws IOException {
    +         // Process the <key, value> pair (assume this takes a while)
    +         // ...
    +         // ...
    +         
    +         // Let the framework know that we are alive, and kicking!
    +         // reporter.progress();
    +         
    +         // Process some more
    +         // ...
    +         // ...
    +         
    +         // Increment the no. of <key, value> pairs processed
    +         ++noRecords;
    +
    +         // Increment counters
    +         reporter.incrCounter(NUM_RECORDS, 1);
    +        
    +         // Every 100 records update application-level status
    +         if ((noRecords%100) == 0) {
    +           reporter.setStatus(mapTaskId + " processed " + noRecords + 
    +                              " from input-file: " + inputFile); 
    +         }
    +         
    +         // Output the result
    +         output.collect(key, val);
    +       }
    +     }
    + 

    + +

    Applications may write a custom {@link MapRunnable} to exert greater + control on map processing e.g. multi-threaded Mappers etc.

    + + @see JobConf + @see InputFormat + @see Partitioner + @see Reducer + @see MapReduceBase + @see MapRunnable + @see SequenceFile]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + Provides default no-op implementations for a few methods, most non-trivial + applications need to override some of them.

    ]]> +
    +
    + + + + + + + + + + + <key, value> pairs. + +

    Mapping of input records to output records is complete when this method + returns.

    + + @param input the {@link RecordReader} to read the input records. + @param output the {@link OutputCollector} to collect the outputrecords. + @param reporter {@link Reporter} to report progress, status-updates etc. + @throws IOException]]> +
    +
    + + Custom implementations of MapRunnable can exert greater + control on map processing e.g. multi-threaded, asynchronous mappers etc.

    + + @see Mapper]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + nearly + equal content length.
    + Subclasses implement {@link #getRecordReader(InputSplit, JobConf, Reporter)} + to construct RecordReader's for MultiFileSplit's. + @see MultiFileSplit]]> +
    +
    + + + + + + + + + + + + + + + + + th Path]]> + + + + + + + + + + + th Path]]> + + + + + + + + + + + + + + + + + + + + + + + MultiFileSplit can be used to implement {@link RecordReader}'s, with + reading one record per file. + @see FileSplit + @see MultiFileInputFormat]]> + + + + + + + + + + + + + + + <key, value> pairs output by {@link Mapper}s + and {@link Reducer}s. + +

    OutputCollector is the generalization of the facility + provided by the Map-Reduce framework to collect data output by either the + Mapper or the Reducer i.e. intermediate outputs + or the output of the job.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + OutputCommitter describes the commit of task output for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the OutputCommitter of + the job to:

    +

      +
    1. + Setup the job during initialization. For example, create the temporary + output directory for the job during the initialization of the job. +
    2. +
    3. + Cleanup the job after the job completion. For example, remove the + temporary output directory after the job completion. +
    4. +
    5. + Setup the task temporary output. +
    6. +
    7. + Check whether a task needs a commit. This is to avoid the commit + procedure if a task does not need commit. +
    8. +
    9. + Commit of the task output. +
    10. +
    11. + Discard the task commit. +
    12. +
    + + @see FileOutputCommitter + @see JobContext + @see TaskAttemptContext]]> +
    +
    + + + + + + + + + + + + + + + + + + + This is to validate the output specification for the job when it is + a job is submitted. Typically checks that it does not already exist, + throwing an exception when it already exists, so that output is not + overwritten.

    + + @param ignored + @param job job configuration. + @throws IOException when output should not be attempted]]> +
    +
    + + OutputFormat describes the output-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the OutputFormat of the + job to:

    +

      +
    1. + Validate the output-specification of the job. For e.g. check that the + output directory doesn't already exist. +
    2. + Provide the {@link RecordWriter} implementation to be used to write out + the output files of the job. Output files are stored in a + {@link FileSystem}. +
    3. +
    + + @see RecordWriter + @see JobConf]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + Typically a hash function on a all or a subset of the key.

    + + @param key the key to be paritioned. + @param value the entry value. + @param numPartitions the total number of partitions. + @return the partition number for the key.]]> +
    +
    + + Partitioner controls the partitioning of the keys of the + intermediate map-outputs. The key (or a subset of the key) is used to derive + the partition, typically by a hash function. The total number of partitions + is the same as the number of reduce tasks for the job. Hence this controls + which of the m reduce tasks the intermediate key (and hence the + record) is sent for reduction.

    + + @see Reducer]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.0 to 1.0. + @throws IOException]]> + + + + RecordReader reads <key, value> pairs from an + {@link InputSplit}. + +

    RecordReader, typically, converts the byte-oriented view of + the input, provided by the InputSplit, and presents a + record-oriented view for the {@link Mapper} & {@link Reducer} tasks for + processing. It thus assumes the responsibility of processing record + boundaries and presenting the tasks with keys and values.

    + + @see InputSplit + @see InputFormat]]> +
    +
    + + + + + + + + + + + + + + + + RecordWriter to future operations. + + @param reporter facility to report progress. + @throws IOException]]> + + + + RecordWriter writes the output <key, value> pairs + to an output file. + +

    RecordWriter implementations write the job outputs to the + {@link FileSystem}. + + @see OutputFormat]]> + + + + + + + + + + + + + + + Reduces values for a given key. + +

    The framework calls this method for each + <key, (list of values)> pair in the grouped inputs. + Output values must be of the same type as input values. Input keys must + not be altered. The framework will reuse the key and value objects + that are passed into the reduce, therefore the application should clone + the objects they want to keep a copy of. In many cases, all values are + combined into zero or one value. +

    + +

    Output pairs are collected with calls to + {@link OutputCollector#collect(Object,Object)}.

    + +

    Applications can use the {@link Reporter} provided to report progress + or just indicate that they are alive. In scenarios where the application + takes an insignificant amount of time to process individual key/value + pairs, this is crucial since the framework might assume that the task has + timed-out and kill that task. The other way of avoiding this is to set + + mapred.task.timeout to a high-enough value (or even zero for no + time-outs).

    + + @param key the key. + @param values the list of values to reduce. + @param output to collect keys and combined values. + @param reporter facility to report progress.]]> +
    + + + The number of Reducers for the job is set by the user via + {@link JobConf#setNumReduceTasks(int)}. Reducer implementations + can access the {@link JobConf} for the job via the + {@link JobConfigurable#configure(JobConf)} method and initialize themselves. + Similarly they can use the {@link Closeable#close()} method for + de-initialization.

    + +

    Reducer has 3 primary phases:

    +
      +
    1. + +

      Shuffle

      + +

      Reducer is input the grouped output of a {@link Mapper}. + In the phase the framework, for each Reducer, fetches the + relevant partition of the output of all the Mappers, via HTTP. +

      +
    2. + +
    3. +

      Sort

      + +

      The framework groups Reducer inputs by keys + (since different Mappers may have output the same key) in this + stage.

      + +

      The shuffle and sort phases occur simultaneously i.e. while outputs are + being fetched they are merged.

      + +
      SecondarySort
      + +

      If equivalence rules for keys while grouping the intermediates are + different from those for grouping keys before reduction, then one may + specify a Comparator via + {@link JobConf#setOutputValueGroupingComparator(Class)}.Since + {@link JobConf#setOutputKeyComparatorClass(Class)} can be used to + control how intermediate keys are grouped, these can be used in conjunction + to simulate secondary sort on values.

      + + + For example, say that you want to find duplicate web pages and tag them + all with the url of the "best" known example. You would set up the job + like: +
        +
      • Map Input Key: url
      • +
      • Map Input Value: document
      • +
      • Map Output Key: document checksum, url pagerank
      • +
      • Map Output Value: url
      • +
      • Partitioner: by checksum
      • +
      • OutputKeyComparator: by checksum and then decreasing pagerank
      • +
      • OutputValueGroupingComparator: by checksum
      • +
      +
    4. + +
    5. +

      Reduce

      + +

      In this phase the + {@link #reduce(Object, Iterator, OutputCollector, Reporter)} + method is called for each <key, (list of values)> pair in + the grouped inputs.

      +

      The output of the reduce task is typically written to the + {@link FileSystem} via + {@link OutputCollector#collect(Object, Object)}.

      +
    6. +
    + +

    The output of the Reducer is not re-sorted.

    + +

    Example:

    +

    +     public class MyReducer<K extends WritableComparable, V extends Writable> 
    +     extends MapReduceBase implements Reducer<K, V, K, V> {
    +     
    +       static enum MyCounters { NUM_RECORDS }
    +        
    +       private String reduceTaskId;
    +       private int noKeys = 0;
    +       
    +       public void configure(JobConf job) {
    +         reduceTaskId = job.get("mapred.task.id");
    +       }
    +       
    +       public void reduce(K key, Iterator<V> values,
    +                          OutputCollector<K, V> output, 
    +                          Reporter reporter)
    +       throws IOException {
    +       
    +         // Process
    +         int noValues = 0;
    +         while (values.hasNext()) {
    +           V value = values.next();
    +           
    +           // Increment the no. of values for this key
    +           ++noValues;
    +           
    +           // Process the <key, value> pair (assume this takes a while)
    +           // ...
    +           // ...
    +           
    +           // Let the framework know that we are alive, and kicking!
    +           if ((noValues%10) == 0) {
    +             reporter.progress();
    +           }
    +         
    +           // Process some more
    +           // ...
    +           // ...
    +           
    +           // Output the <key, value> 
    +           output.collect(key, value);
    +         }
    +         
    +         // Increment the no. of <key, list of values> pairs processed
    +         ++noKeys;
    +         
    +         // Increment counters
    +         reporter.incrCounter(NUM_RECORDS, 1);
    +         
    +         // Every 100 keys update application-level status
    +         if ((noKeys%100) == 0) {
    +           reporter.setStatus(reduceTaskId + " processed " + noKeys);
    +         }
    +       }
    +     }
    + 

    + + @see Mapper + @see Partitioner + @see Reporter + @see MapReduceBase]]> +
    +
    + + + + + + + + + + + + + + + Counter of the given group/name.]]> + + + + + + + Enum. + @param amount A non-negative amount by which the counter is to + be incremented.]]> + + + + + + + + + + + + + + InputSplit that the map is reading from. + @throws UnsupportedOperationException if called outside a mapper]]> + + + + + + + + + {@link Mapper} and {@link Reducer} can use the Reporter + provided to report progress or just indicate that they are alive. In + scenarios where the application takes an insignificant amount of time to + process individual key/value pairs, this is crucial since the framework + might assume that the task has timed-out and kill that task. + +

    Applications can also update {@link Counters} via the provided + Reporter .

    + + @see Progressable + @see Counters]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + progress of the job's map-tasks, as a float between 0.0 + and 1.0. When all map tasks have completed, the function returns 1.0. + + @return the progress of the job's map-tasks. + @throws IOException]]> + + + + + + progress of the job's reduce-tasks, as a float between 0.0 + and 1.0. When all reduce tasks have completed, the function returns 1.0. + + @return the progress of the job's reduce-tasks. + @throws IOException]]> + + + + + + progress of the job's cleanup-tasks, as a float between 0.0 + and 1.0. When all cleanup tasks have completed, the function returns 1.0. + + @return the progress of the job's cleanup-tasks. + @throws IOException]]> + + + + + + progress of the job's setup-tasks, as a float between 0.0 + and 1.0. When all setup tasks have completed, the function returns 1.0. + + @return the progress of the job's setup-tasks. + @throws IOException]]> + + + + + + true if the job is complete, else false. + @throws IOException]]> + + + + + + true if the job succeeded, else false. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RunningJob is the user-interface to query for details on a + running Map-Reduce job. + +

    Clients can get hold of RunningJob via the {@link JobClient} + and then query the running-job for details such as name, configuration, + progress etc.

    + + @see JobClient]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This allows the user to specify the key class to be different + from the actual class ({@link BytesWritable}) used for writing

    + + @param conf the {@link JobConf} to modify + @param theClass the SequenceFile output key class.]]> +
    +
    + + + + + This allows the user to specify the value class to be different + from the actual class ({@link BytesWritable}) used for writing

    + + @param conf the {@link JobConf} to modify + @param theClass the SequenceFile output key class.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + f. The filtering criteria is + MD5(key) % f == 0.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + f using + the criteria record# % f == 0. + For example, if the frequency is 10, one out of 10 records is returned.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if auto increment + {@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS}. + false otherwise.]]> + + + + + + + + + + + + + true if auto increment + {@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS}. + false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Hadoop provides an optional mode of execution in which the bad records + are detected and skipped in further attempts. + +

    This feature can be used when map/reduce tasks crashes deterministically on + certain input. This happens due to bugs in the map/reduce function. The usual + course would be to fix these bugs. But sometimes this is not possible; + perhaps the bug is in third party libraries for which the source code is + not available. Due to this, the task never reaches to completion even with + multiple attempts and complete data for that task is lost.

    + +

    With this feature, only a small portion of data is lost surrounding + the bad record, which may be acceptable for some user applications. + see {@link SkipBadRecords#setMapperMaxSkipRecords(Configuration, long)}

    + +

    The skipping mode gets kicked off after certain no of failures + see {@link SkipBadRecords#setAttemptsToStartSkipping(Configuration, int)}

    + +

    In the skipping mode, the map/reduce task maintains the record range which + is getting processed at all times. Before giving the input to the + map/reduce function, it sends this record range to the Task tracker. + If task crashes, the Task tracker knows which one was the last reported + range. On further attempts that range get skipped.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + all task attempt IDs + of any jobtracker, in any job, of the first + map task, we would use : +
     
    + TaskAttemptID.getTaskAttemptIDsPattern(null, null, true, 1, null);
    + 
    + which will return : +
     "attempt_[^_]*_[0-9]*_m_000001_[0-9]*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @param isMap whether the tip is a map, or null + @param taskId taskId number, or null + @param attemptId the task attempt number, or null + @return a regex pattern matching TaskAttemptIDs]]> +
    +
    + + + An example TaskAttemptID is : + attempt_200707121733_0003_m_000005_0 , which represents the + zeroth task attempt for the fifth map task in the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse TaskAttemptID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the first map task + of any jobtracker, of any job, we would use : +

     
    + TaskID.getTaskIDsPattern(null, null, true, 1);
    + 
    + which will return : +
     "task_[^_]*_[0-9]*_m_000001*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @param isMap whether the tip is a map, or null + @param taskId taskId number, or null + @return a regex pattern matching TaskIDs]]> +
    + + + + An example TaskID is : + task_200707121733_0003_m_000005 , which represents the + fifth map task in the third job running at the jobtracker + started at 200707121733. +

    + Applications should never construct or parse TaskID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskAttemptID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hadoop.log.dir.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the Job was added.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ([,]*) + func ::= tbl(,"") + class ::= @see java.lang.Class#forName(java.lang.String) + path ::= @see org.apache.hadoop.fs.Path#Path(java.lang.String) + } + Reads expression from the mapred.join.expr property and + user-supplied join types from mapred.join.define.<ident> + types. Paths supplied to tbl are given as input paths to the + InputFormat class listed. + @see #compose(java.lang.String, java.lang.Class, java.lang.String...)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ,

    ) }]]> + + + + + + + + (tbl(,),tbl(,),...,tbl(,)) }]]> + + + + + + + + (tbl(,),tbl(,),...,tbl(,)) }]]> + + + + mapred.join.define.<ident> to a classname. In the expression + mapred.join.expr, the identifier will be assumed to be a + ComposableRecordReader. + mapred.join.keycomparator can be a classname used to compare keys + in the join. + @see JoinRecordReader + @see MultiFilterRecordReader]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ...... + }]]> + + + + + + + + + + + + + + + + + + + + + capacity children to position + id in the parent reader. + The id of a root CompositeRecordReader is -1 by convention, but relying + on this is not recommended.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + override(S1,S2,S3) will prefer values + from S3 over S2, and values from S2 over S1 for all keys + emitted from all sources.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [,,...,]]]> + + + + + + + out. + TupleWritable format: + {@code + ...... + }]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It has to be specified how key and values are passed from one element of + the chain to the next, by value or by reference. If a Mapper leverages the + assumed semantics that the key and values are not modified by the collector + 'by value' must be used. If the Mapper does not expect this semantics, as + an optimization to avoid serialization and deserialization 'by reference' + can be used. +

    + For the added Mapper the configuration given for it, + mapperConf, have precedence over the job's JobConf. This + precedence is in effect when the task is running. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainMapper, this is done by the addMapper for the last mapper in the chain +

    + + @param job job's JobConf to add the Mapper class. + @param klass the Mapper class to add. + @param inputKeyClass mapper input key class. + @param inputValueClass mapper input value class. + @param outputKeyClass mapper output key class. + @param outputValueClass mapper output value class. + @param byValue indicates if key/values should be passed by value + to the next Mapper in the chain, if any. + @param mapperConf a JobConf with the configuration for the Mapper + class. It is recommended to use a JobConf without default values using the + JobConf(boolean loadDefaults) constructor with FALSE.]]> + + + + + + + If this method is overriden super.configure(...) should be + invoked at the beginning of the overwriter method.]]> + + + + + + + + + + map(...) methods of the Mappers in the chain.]]> + + + + + + + If this method is overriden super.close() should be + invoked at the end of the overwriter method.]]> + + + + + The Mapper classes are invoked in a chained (or piped) fashion, the output of + the first becomes the input of the second, and so on until the last Mapper, + the output of the last Mapper will be written to the task's output. +

    + The key functionality of this feature is that the Mappers in the chain do not + need to be aware that they are executed in a chain. This enables having + reusable specialized Mappers that can be combined to perform composite + operations within a single task. +

    + Special care has to be taken when creating chains that the key/values output + by a Mapper are valid for the following Mapper in the chain. It is assumed + all Mappers and the Reduce in the chain use maching output and input key and + value classes as no conversion is done by the chaining code. +

    + Using the ChainMapper and the ChainReducer classes is possible to compose + Map/Reduce jobs that look like [MAP+ / REDUCE MAP*]. And + immediate benefit of this pattern is a dramatic reduction in disk IO. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainMapper, this is done by the addMapper for the last mapper in the chain. +

    + ChainMapper usage pattern: +

    +

    + ...
    + conf.setJobName("chain");
    + conf.setInputFormat(TextInputFormat.class);
    + conf.setOutputFormat(TextOutputFormat.class);
    + 

    + JobConf mapAConf = new JobConf(false); + ... + ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class, + Text.class, Text.class, true, mapAConf); +

    + JobConf mapBConf = new JobConf(false); + ... + ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class, + LongWritable.class, Text.class, false, mapBConf); +

    + JobConf reduceConf = new JobConf(false); + ... + ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class, + Text.class, Text.class, true, reduceConf); +

    + ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class, + LongWritable.class, Text.class, false, null); +

    + ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class, + LongWritable.class, LongWritable.class, true, null); +

    + FileInputFormat.setInputPaths(conf, inDir); + FileOutputFormat.setOutputPath(conf, outDir); + ... +

    + JobClient jc = new JobClient(conf); + RunningJob job = jc.submitJob(conf); + ... +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + It has to be specified how key and values are passed from one element of + the chain to the next, by value or by reference. If a Reducer leverages the + assumed semantics that the key and values are not modified by the collector + 'by value' must be used. If the Reducer does not expect this semantics, as + an optimization to avoid serialization and deserialization 'by reference' + can be used. +

    + For the added Reducer the configuration given for it, + reducerConf, have precedence over the job's JobConf. This + precedence is in effect when the task is running. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainReducer, this is done by the setReducer or the addMapper for the last + element in the chain. + + @param job job's JobConf to add the Reducer class. + @param klass the Reducer class to add. + @param inputKeyClass reducer input key class. + @param inputValueClass reducer input value class. + @param outputKeyClass reducer output key class. + @param outputValueClass reducer output value class. + @param byValue indicates if key/values should be passed by value + to the next Mapper in the chain, if any. + @param reducerConf a JobConf with the configuration for the Reducer + class. It is recommended to use a JobConf without default values using the + JobConf(boolean loadDefaults) constructor with FALSE.]]> + + + + + + + + + + + + + + It has to be specified how key and values are passed from one element of + the chain to the next, by value or by reference. If a Mapper leverages the + assumed semantics that the key and values are not modified by the collector + 'by value' must be used. If the Mapper does not expect this semantics, as + an optimization to avoid serialization and deserialization 'by reference' + can be used. +

    + For the added Mapper the configuration given for it, + mapperConf, have precedence over the job's JobConf. This + precedence is in effect when the task is running. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainMapper, this is done by the addMapper for the last mapper in the chain + . + + @param job chain job's JobConf to add the Mapper class. + @param klass the Mapper class to add. + @param inputKeyClass mapper input key class. + @param inputValueClass mapper input value class. + @param outputKeyClass mapper output key class. + @param outputValueClass mapper output value class. + @param byValue indicates if key/values should be passed by value + to the next Mapper in the chain, if any. + @param mapperConf a JobConf with the configuration for the Mapper + class. It is recommended to use a JobConf without default values using the + JobConf(boolean loadDefaults) constructor with FALSE.]]> + + + + + + + If this method is overriden super.configure(...) should be + invoked at the beginning of the overwriter method.]]> + + + + + + + + + + reduce(...) method of the Reducer with the + map(...) methods of the Mappers in the chain.]]> + + + + + + + If this method is overriden super.close() should be + invoked at the end of the overwriter method.]]> + + + + + For each record output by the Reducer, the Mapper classes are invoked in a + chained (or piped) fashion, the output of the first becomes the input of the + second, and so on until the last Mapper, the output of the last Mapper will + be written to the task's output. +

    + The key functionality of this feature is that the Mappers in the chain do not + need to be aware that they are executed after the Reducer or in a chain. + This enables having reusable specialized Mappers that can be combined to + perform composite operations within a single task. +

    + Special care has to be taken when creating chains that the key/values output + by a Mapper are valid for the following Mapper in the chain. It is assumed + all Mappers and the Reduce in the chain use maching output and input key and + value classes as no conversion is done by the chaining code. +

    + Using the ChainMapper and the ChainReducer classes is possible to compose + Map/Reduce jobs that look like [MAP+ / REDUCE MAP*]. And + immediate benefit of this pattern is a dramatic reduction in disk IO. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainReducer, this is done by the setReducer or the addMapper for the last + element in the chain. +

    + ChainReducer usage pattern: +

    +

    + ...
    + conf.setJobName("chain");
    + conf.setInputFormat(TextInputFormat.class);
    + conf.setOutputFormat(TextOutputFormat.class);
    + 

    + JobConf mapAConf = new JobConf(false); + ... + ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class, + Text.class, Text.class, true, mapAConf); +

    + JobConf mapBConf = new JobConf(false); + ... + ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class, + LongWritable.class, Text.class, false, mapBConf); +

    + JobConf reduceConf = new JobConf(false); + ... + ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class, + Text.class, Text.class, true, reduceConf); +

    + ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class, + LongWritable.class, Text.class, false, null); +

    + ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class, + LongWritable.class, LongWritable.class, true, null); +

    + FileInputFormat.setInputPaths(conf, inDir); + FileOutputFormat.setOutputPath(conf, outDir); + ... +

    + JobClient jc = new JobClient(conf); + RunningJob job = jc.submitJob(conf); + ... +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + all splits. + @param freq The frequency with which records will be emitted.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + all splits. + This will read every split at the client, which is very expensive. + @param freq Probability with which a key will be chosen. + @param numSamples Total number of samples to obtain from all selected + splits.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + all splits. + Takes the first numSamples / numSplits records from each split. + @param numSamples Total number of samples to obtain from all selected + splits.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the name output is multi, false + if it is single. If the name output is not defined it returns + false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @param conf job conf to add the named output + @param namedOutput named output name, it has to be a word, letters + and numbers only, cannot be the word 'part' as + that is reserved for the + default output. + @param outputFormatClass OutputFormat class. + @param keyClass key class + @param valueClass value class]]> + + + + + + + + + + + + @param conf job conf to add the named output + @param namedOutput named output name, it has to be a word, letters + and numbers only, cannot be the word 'part' as + that is reserved for the + default output. + @param outputFormatClass OutputFormat class. + @param keyClass key class + @param valueClass value class]]> + + + + + + + + By default these counters are disabled. +

    + MultipleOutputs supports counters, by default the are disabled. + The counters group is the {@link MultipleOutputs} class name. +

    + The names of the counters are the same as the named outputs. For multi + named outputs the name of the counter is the concatenation of the named + output, and underscore '_' and the multiname. + + @param conf job conf to enableadd the named output. + @param enabled indicates if the counters will be enabled or not.]]> +
    +
    + + + + + By default these counters are disabled. +

    + MultipleOutputs supports counters, by default the are disabled. + The counters group is the {@link MultipleOutputs} class name. +

    + The names of the counters are the same as the named outputs. For multi + named outputs the name of the counter is the concatenation of the named + output, and underscore '_' and the multiname. + + + @param conf job conf to enableadd the named output. + @return TRUE if the counters are enabled, FALSE if they are disabled.]]> +
    +
    + + + + + + + + + + + + + @param namedOutput the named output name + @param reporter the reporter + @return the output collector for the given named output + @throws IOException thrown if output collector could not be created]]> + + + + + + + + + + + @param namedOutput the named output name + @param multiName the multi name part + @param reporter the reporter + @return the output collector for the given named output + @throws IOException thrown if output collector could not be created]]> + + + + + + + If overriden subclasses must invoke super.close() at the + end of their close() + + @throws java.io.IOException thrown if any of the MultipleOutput files + could not be closed properly.]]> + + + + OutputCollector passed to + the map() and reduce() methods of the + Mapper and Reducer implementations. +

    + Each additional output, or named output, may be configured with its own + OutputFormat, with its own key class and with its own value + class. +

    + A named output can be a single file or a multi file. The later is refered as + a multi named output. +

    + A multi named output is an unbound set of files all sharing the same + OutputFormat, key class and value class configuration. +

    + When named outputs are used within a Mapper implementation, + key/values written to a name output are not part of the reduce phase, only + key/values written to the job OutputCollector are part of the + reduce phase. +

    + MultipleOutputs supports counters, by default the are disabled. The counters + group is the {@link MultipleOutputs} class name. +

    + The names of the counters are the same as the named outputs. For multi + named outputs the name of the counter is the concatenation of the named + output, and underscore '_' and the multiname. +

    + Job configuration usage pattern is: +

    +
    + JobConf conf = new JobConf();
    +
    + conf.setInputPath(inDir);
    + FileOutputFormat.setOutputPath(conf, outDir);
    +
    + conf.setMapperClass(MOMap.class);
    + conf.setReducerClass(MOReduce.class);
    + ...
    +
    + // Defines additional single text based output 'text' for the job
    + MultipleOutputs.addNamedOutput(conf, "text", TextOutputFormat.class,
    + LongWritable.class, Text.class);
    +
    + // Defines additional multi sequencefile based output 'sequence' for the
    + // job
    + MultipleOutputs.addMultiNamedOutput(conf, "seq",
    +   SequenceFileOutputFormat.class,
    +   LongWritable.class, Text.class);
    + ...
    +
    + JobClient jc = new JobClient();
    + RunningJob job = jc.submitJob(conf);
    +
    + ...
    + 
    +

    + Job configuration usage pattern is: +

    +
    + public class MOReduce implements
    +   Reducer<WritableComparable, Writable> {
    + private MultipleOutputs mos;
    +
    + public void configure(JobConf conf) {
    + ...
    + mos = new MultipleOutputs(conf);
    + }
    +
    + public void reduce(WritableComparable key, Iterator<Writable> values,
    + OutputCollector output, Reporter reporter)
    + throws IOException {
    + ...
    + mos.getCollector("text", reporter).collect(key, new Text("Hello"));
    + mos.getCollector("seq", "A", reporter).collect(key, new Text("Bye"));
    + mos.getCollector("seq", "B", reporter).collect(key, new Text("Chau"));
    + ...
    + }
    +
    + public void close() throws IOException {
    + mos.close();
    + ...
    + }
    +
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It can be used instead of the default implementation, + @link org.apache.hadoop.mapred.MapRunner, when the Map operation is not CPU + bound in order to improve throughput. +

    + Map implementations using this MapRunnable must be thread-safe. +

    + The Map-Reduce job has to be configured to use this MapRunnable class (using + the JobConf.setMapRunnerClass method) and + the number of thread the thread-pool can use with the + mapred.map.multithreadedrunner.threads property, its default + value is 10 threads. +

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + pairs. Uses + {@link StringTokenizer} to break text into tokens.]]> + + + + + + + + + + + + total.order.partitioner.natural.order is not false, a trie + of the first total.order.partitioner.max.trie.depth(2) + 1 bytes + will be built. Otherwise, keys will be located using a binary search of + the partition keyset using the {@link org.apache.hadoop.io.RawComparator} + defined for this job. The input file must be sorted with the same + comparator and contain {@link + org.apache.hadoop.mapred.JobConf#getNumReduceTasks} - 1 keys.]]> + + + + + + + + + + + + R reduces, there are R-1 + keys in the SequenceFile.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + generateKeyValPairs(Object key, Object value); public void + configure(JobConfjob); } + + The package also provides a base class, ValueAggregatorBaseDescriptor, + implementing the above interface. The user can extend the base class and + implement generateKeyValPairs accordingly. + + The primary work of generateKeyValPairs is to emit one or more key/value + pairs based on the input key/value pair. The key in an output key/value pair + encode two pieces of information: aggregation type and aggregation id. The + value will be aggregated onto the aggregation id according the aggregation + type. + + This class offers a function to generate a map/reduce job using Aggregate + framework. The function takes the following parameters: input directory spec + input format (text or sequence file) output directory a file specifying the + user plugin class]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The job can be configured using the static methods in this class, + {@link DBInputFormat}, and {@link DBOutputFormat}. +

    + Alternatively, the properties can be set in the configuration with proper + values. + + @see DBConfiguration#configureDB(JobConf, String, String, String, String) + @see DBInputFormat#setInput(JobConf, Class, String, String) + @see DBInputFormat#setInput(JobConf, Class, String, String, String, String...) + @see DBOutputFormat#setOutput(JobConf, String, String...)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 20070101 AND length > 0)' + @param orderBy the fieldNames in the orderBy clause. + @param fieldNames The field names in the table + @see #setInput(JobConf, Class, String, String)]]> + + + + + + + + + + + + + + DBInputFormat emits LongWritables containing the record number as + key and DBWritables as value. + + The SQL query, and input class can be using one of the two + setInput methods.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {@link DBOutputFormat} accepts <key,value> pairs, where + key has a type extending DBWritable. Returned {@link RecordWriter} + writes only the key to the database with a batch SQL query.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DBWritable. DBWritable, is similar to {@link Writable} + except that the {@link #write(PreparedStatement)} method takes a + {@link PreparedStatement}, and {@link #readFields(ResultSet)} + takes a {@link ResultSet}. +

    + Implementations are responsible for writing the fields of the object + to PreparedStatement, and reading the fields of the object from the + ResultSet. + +

    Example:

    + If we have the following table in the database : +
    + CREATE TABLE MyTable (
    +   counter        INTEGER NOT NULL,
    +   timestamp      BIGINT  NOT NULL,
    + );
    + 
    + then we can read/write the tuples from/to the table with : +

    + public class MyWritable implements Writable, DBWritable {
    +   // Some data     
    +   private int counter;
    +   private long timestamp;
    +       
    +   //Writable#write() implementation
    +   public void write(DataOutput out) throws IOException {
    +     out.writeInt(counter);
    +     out.writeLong(timestamp);
    +   }
    +       
    +   //Writable#readFields() implementation
    +   public void readFields(DataInput in) throws IOException {
    +     counter = in.readInt();
    +     timestamp = in.readLong();
    +   }
    +       
    +   public void write(PreparedStatement statement) throws SQLException {
    +     statement.setInt(1, counter);
    +     statement.setLong(2, timestamp);
    +   }
    +       
    +   public void readFields(ResultSet resultSet) throws SQLException {
    +     counter = resultSet.getInt(1);
    +     timestamp = resultSet.getLong(2);
    +   } 
    + }
    + 

    ]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When constructing the instance, if the factory property + contextName.class exists, + its value is taken to be the name of the class to instantiate. Otherwise, + the default is to create an instance of + org.apache.hadoop.metrics.spi.NullContext, which is a + dummy "no-op" context which will cause all metric data to be discarded. + + @param contextName the name of the context + @return the named MetricsContext]]> + + + + + + + + + + + + + + When the instance is constructed, this method checks if the file + hadoop-metrics.properties exists on the class path. If it + exists, it must be in the format defined by java.util.Properties, and all + the properties in the file are set as attributes on the newly created + ContextFactory instance. + + @return the singleton ContextFactory instance]]> + + + + getFactory() method.]]> + + + + + + + + + + + + + + + + + + + startMonitoring() again after calling + this. + @see #close()]]> + + + + + + + + + + + + + + + + recordName. + Throws an exception if the metrics implementation is configured with a fixed + set of record names and recordName is not in that set. + + @param recordName the name of the record + @throws MetricsException if recordName conflicts with configuration data]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A record name identifies the kind of data to be reported. For example, a + program reporting statistics relating to the disks on a computer might use + a record name "diskStats".

    + + A record has zero or more tags. A tag has a name and a value. To + continue the example, the "diskStats" record might use a tag named + "diskName" to identify a particular disk. Sometimes it is useful to have + more than one tag, so there might also be a "diskType" with value "ide" or + "scsi" or whatever.

    + + A record also has zero or more metrics. These are the named + values that are to be reported to the metrics system. In the "diskStats" + example, possible metric names would be "diskPercentFull", "diskPercentBusy", + "kbReadPerSecond", etc.

    + + The general procedure for using a MetricsRecord is to fill in its tag and + metric values, and then call update() to pass the record to the + client library. + Metric data is not immediately sent to the metrics system + each time that update() is called. + An internal table is maintained, identified by the record name. This + table has columns + corresponding to the tag and the metric names, and rows + corresponding to each unique set of tag values. An update + either modifies an existing row in the table, or adds a new row with a set of + tag values that are different from all the other rows. Note that if there + are no tags, then there can be at most one row in the table.

    + + Once a row is added to the table, its data will be sent to the metrics system + on every timer period, whether or not it has been updated since the previous + timer period. If this is inappropriate, for example if metrics were being + reported by some transient object in an application, the remove() + method can be used to remove the row and thus stop the data from being + sent.

    + + Note that the update() method is atomic. This means that it is + safe for different threads to be updating the same metric. More precisely, + it is OK for different threads to call update() on MetricsRecord instances + with the same set of tag names and tag values. Different threads should + not use the same MetricsRecord instance at the same time.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MetricsContext.registerUpdater().]]> + + + + + + + + + + + + + + + + + + + + + + + + + fileName attribute, + if specified. Otherwise the data will be written to standard + output.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is configured by setting ContextFactory attributes which in turn + are usually configured through a properties file. All the attributes are + prefixed by the contextName. For example, the properties file might contain: +

    + myContextName.fileName=/tmp/metrics.log
    + myContextName.period=5
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + contextName.tableName. The returned map consists of + those attributes with the contextName and tableName stripped off.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + recordName. + Throws an exception if the metrics implementation is configured with a fixed + set of record names and recordName is not in that set. + + @param recordName the name of the record + @throws MetricsException if recordName conflicts with configuration data]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class implements the internal table of metric data, and the timer + on which data is to be sent to the metrics system. Subclasses must + override the abstract emitRecord method in order to transmit + the data.

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + update + and remove().]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hostname or hostname:port. If + the specs string is null, defaults to localhost:defaultPort. + + @return a list of InetSocketAddress objects.]]> + + + + + + + + + + + + + + + + + + + ,name=" + Where the and are the supplied parameters + + @param serviceName + @param nameName + @param theMbean - the MBean to register + @return the named used to register the MBean]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hadoop.rpc.socket.factory.class.<ClassName>. When no + such parameter exists then fall back on the default socket factory as + configured by hadoop.rpc.socket.factory.class.default. If + this default socket factory is not configured, then fall back on the JVM + default socket factory. + + @param conf the configuration + @param clazz the class (usually a {@link VersionedProtocol}) + @return a socket factory]]> + + + + + + hadoop.rpc.socket.factory.default + + @param conf the configuration + @return the default socket factory as specified in the configuration or + the JVM default socket factory if the configuration does not + contain a default socket factory property.]]> + + + + + + + + + + + + + : + ://:/]]> + + + + + + + + : + ://:/]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + From documentation for {@link #getInputStream(Socket, long)}:
    + Returns InputStream for the socket. If the socket has an associated + SocketChannel then it returns a + {@link SocketInputStream} with the given timeout. If the socket does not + have a channel, {@link Socket#getInputStream()} is returned. In the later + case, the timeout argument is ignored and the timeout set with + {@link Socket#setSoTimeout(int)} applies for reads.

    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getInputStream()}. + + @see #getInputStream(Socket, long) + + @param socket + @return InputStream for reading from the socket. + @throws IOException]]> +
    +
    + + + + + +
    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getInputStream()}. + + @see Socket#getChannel() + + @param socket + @param timeout timeout in milliseconds. This may not always apply. zero + for waiting as long as necessary. + @return InputStream for reading from the socket. + @throws IOException]]> +
    +
    + + + + +
    + + From documentation for {@link #getOutputStream(Socket, long)} :
    + Returns OutputStream for the socket. If the socket has an associated + SocketChannel then it returns a + {@link SocketOutputStream} with the given timeout. If the socket does not + have a channel, {@link Socket#getOutputStream()} is returned. In the later + case, the timeout argument is ignored and the write will wait until + data is available.

    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getOutputStream()}. + + @see #getOutputStream(Socket, long) + + @param socket + @return OutputStream for writing to the socket. + @throws IOException]]> +
    +
    + + + + + +
    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getOutputStream()}. + + @see Socket#getChannel() + + @param socket + @param timeout timeout in milliseconds. This may not always apply. zero + for waiting as long as necessary. + @return OutputStream for writing to the socket. + @throws IOException]]> +
    +
    + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + node + + @param node + a node + @return true if node is already in the tree; false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + scope + if scope starts with ~, choose one from the all nodes except for the + ones in scope; otherwise, choose one from scope + @param scope range of nodes from which a node will be choosen + @return the choosen node]]> + + + + + + + scope but not in excludedNodes + if scope starts with ~, return the number of nodes that are not + in scope and excludedNodes; + @param scope a path string that may start with ~ + @param excludedNodes a list of nodes + @return number of available nodes]]> + + + + + + + + + + + + reader + It linearly scans the array, if a local node is found, swap it with + the first element of the array. + If a local rack node is found, swap it with the first element following + the local node. + If neither local node or local rack node is found, put a random replica + location at postion 0. + It leaves the rest nodes untouched.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + Create a new input stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + + @see SocketInputStream#SocketInputStream(ReadableByteChannel, long) + + @param socket should have a channel associated with it. + @param timeout timeout timeout in milliseconds. must not be negative. + @throws IOException]]> +
    +
    + + + +
    + + Create a new input stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + @see SocketInputStream#SocketInputStream(ReadableByteChannel, long) + + @param socket should have a channel associated with it. + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + +
    + + Create a new ouput stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + + @see SocketOutputStream#SocketOutputStream(WritableByteChannel, long) + + @param socket should have a channel associated with it. + @param timeout timeout timeout in milliseconds. must not be negative. + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + = getCount(). + @param newCapacity The new capacity in bytes.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Index idx = startVector(...); + while (!idx.done()) { + .... // read element of a vector + idx.incr(); + } + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This task takes the given record definition files and compiles them into + java or c++ + files. It is then up to the user to compile the generated files. + +

    The task requires the file or the nested fileset element to be + specified. Optional attributes are language (set the output + language, default is "java"), + destdir (name of the destination directory for generated java/c++ + code, default is ".") and failonerror (specifies error handling + behavior. default is true). +

    Usage

    +
    + <recordcc
    +       destdir="${basedir}/gensrc"
    +       language="java">
    +   <fileset include="**\/*.jr" />
    + </recordcc>
    + 
    ]]> +
    +
    + +
    + + + + + + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ugi as a comma separated string in + conf as a property attr + + The String starts with the user name followed by the default group names, + and other group names. + + @param conf configuration + @param attr property name + @param ugi a UnixUserGroupInformation]]> + + + + + + + + conf + + The object is expected to store with the property name attr + as a comma separated string that starts + with the user name followed by group names. + If the property name is not defined, return null. + It's assumed that there is only one UGI per user. If this user already + has a UGI in the ugi map, return the ugi in the map. + Otherwise, construct a UGI from the configuration, store it in the + ugi map and return it. + + @param conf configuration + @param attr property name + @return a UnixUGI + @throws LoginException if the stored string is ill-formatted.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This tool supports archiving and anaylzing (sort/grep) of log-files. + It takes as input + a) Input uri which will serve uris of the logs to be archived. + b) Output directory (not mandatory). + b) Directory on dfs to archive the logs. + c) The sort/grep patterns for analyzing the files and separator for boundaries. + Usage: + Logalyzer -archive -archiveDir -analysis -logs -grep -sort -separator +

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + in]]> + + + + + + + out.]]> + + + + + + + + + + reset is true, then resets the checksum. + @return number of bytes written. Will be equal to getChecksumSize();]]> + + + + + + + + + reset is true, then resets the checksum. + @return number of bytes written. Will be equal to getChecksumSize();]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GenericOptionsParser to parse only the generic Hadoop + arguments. + + The array of string arguments other than the generic arguments can be + obtained by {@link #getRemainingArgs()}. + + @param conf the Configuration to modify. + @param args command-line arguments.]]> + + + + + GenericOptionsParser to parse given options as well + as generic Hadoop options. + + The resulting CommandLine object can be obtained by + {@link #getCommandLine()}. + + @param conf the configuration to modify + @param options options built by the caller + @param args User-specified arguments]]> + + + + + Strings containing the un-parsed arguments + or empty array if commandLine was not defined.]]> + + + + + CommandLine object + to process the parsed arguments. + + Note: If the object is created with + {@link #GenericOptionsParser(Configuration, String[])}, then returned + object will only contain parsed generic options. + + @return CommandLine representing list of arguments + parsed against Options descriptor.]]> + + + + + + + + + + + + + + + + + GenericOptionsParser is a utility to parse command line + arguments generic to the Hadoop framework. + + GenericOptionsParser recognizes several standarad command + line arguments, enabling applications to easily specify a namenode, a + jobtracker, additional configuration resources etc. + +

    Generic Options

    + +

    The supported generic options are:

    +

    +     -conf <configuration file>     specify a configuration file
    +     -D <property=value>            use value for given property
    +     -fs <local|namenode:port>      specify a namenode
    +     -jt <local|jobtracker:port>    specify a job tracker
    +     -files <comma separated list of files>    specify comma separated
    +                            files to be copied to the map reduce cluster
    +     -libjars <comma separated list of jars>   specify comma separated
    +                            jar files to include in the classpath.
    +     -archives <comma separated list of archives>    specify comma
    +             separated archives to be unarchived on the compute machines.
    +
    + 

    + +

    The general command line syntax is:

    +

    + bin/hadoop command [genericOptions] [commandOptions]
    + 

    + +

    Generic command line arguments might modify + Configuration objects, given to constructors.

    + +

    The functionality is implemented using Commons CLI.

    + +

    Examples:

    +

    + $ bin/hadoop dfs -fs darwin:8020 -ls /data
    + list /data directory in dfs with namenode darwin:8020
    + 
    + $ bin/hadoop dfs -D fs.default.name=darwin:8020 -ls /data
    + list /data directory in dfs with namenode darwin:8020
    +     
    + $ bin/hadoop dfs -conf hadoop-site.xml -ls /data
    + list /data directory in dfs with conf specified in hadoop-site.xml
    +     
    + $ bin/hadoop job -D mapred.job.tracker=darwin:50020 -submit job.xml
    + submit a job to job tracker darwin:50020
    +     
    + $ bin/hadoop job -jt darwin:50020 -submit job.xml
    + submit a job to job tracker darwin:50020
    +     
    + $ bin/hadoop job -jt local -submit job.xml
    + submit a job to local runner
    + 
    + $ bin/hadoop jar -libjars testlib.jar 
    + -archives test.tgz -files file.txt inputjar args
    + job submission with libjars, files and archives
    + 

    + + @see Tool + @see ToolRunner]]> +
    +
    + + + + + + + + + Class<T>) of the + argument of type T. + @param The type of the argument + @param t the object to get it class + @return Class<T>]]> + + + + + + + List<T> to a an array of + T[]. + @param c the Class object of the items in the list + @param list the list to convert]]> + + + + + + List<T> to a an array of + T[]. + @param list the list to convert + @throws ArrayIndexOutOfBoundsException if the list is empty. + Use {@link #toArray(Class, List)} if the list may be empty.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + io.file.buffer.size specified in the given + Configuration. + @param in input stream + @param conf configuration + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if native-hadoop is loaded, + else false]]> + + + + + + true if native hadoop libraries, if present, can be + used for this job; false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { pq.top().change(); pq.adjustTop(); } + instead of
    +  { o = pq.pop(); o.change(); pq.push(o); }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Clients and/or applications can use the provided Progressable + to explicitly report progress to the Hadoop framework. This is especially + important for operations which take an insignificant amount of time since, + in-lieu of the reported progress, the framework has to assume that an error + has occured and time-out the operation.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Class is to be obtained + @return the correctly typed Class of the given object.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Hadoop Pipes + or Hadoop Streaming. + + It also checks to ensure that we are running on a *nix platform else + (e.g. in Cygwin/Windows) it returns null. + @param conf configuration + @return a String[] with the ulimit command arguments or + null if we are running on a non *nix platform or + if the limit is unspecified.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Shell interface. + @param cmd shell command to execute. + @return the output of the executed command.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + Shell can be used to run unix commands like du or + df. It also offers facilities to gate commands by + time-intervals.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ShellCommandExecutorshould be used in cases where the output + of the command needs no explicit parsing and where the command, working + directory and the environment remains unchanged. The output of the command + is stored as-is and is expected to be small.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ArrayList of string values]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + charToEscape in the string + with the escape char escapeChar + + @param str string + @param escapeChar escape char + @param charToEscape the char to be escaped + @return an escaped string]]> + + + + + + + + + + + + + + + + + + + + + + charToEscape in the string + with the escape char escapeChar + + @param str string + @param escapeChar escape char + @param charToEscape the escaped char + @return an unescaped string]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tool, is the standard for any Map-Reduce tool/application. + The tool/application should delegate the handling of + + standard command-line options to {@link ToolRunner#run(Tool, String[])} + and only handle its custom arguments.

    + +

    Here is how a typical Tool is implemented:

    +

    +     public class MyApp extends Configured implements Tool {
    +     
    +       public int run(String[] args) throws Exception {
    +         // Configuration processed by ToolRunner
    +         Configuration conf = getConf();
    +         
    +         // Create a JobConf using the processed conf
    +         JobConf job = new JobConf(conf, MyApp.class);
    +         
    +         // Process custom command-line options
    +         Path in = new Path(args[1]);
    +         Path out = new Path(args[2]);
    +         
    +         // Specify various job-specific parameters     
    +         job.setJobName("my-app");
    +         job.setInputPath(in);
    +         job.setOutputPath(out);
    +         job.setMapperClass(MyApp.MyMapper.class);
    +         job.setReducerClass(MyApp.MyReducer.class);
    +
    +         // Submit the job, then poll for progress until the job is complete
    +         JobClient.runJob(job);
    +       }
    +       
    +       public static void main(String[] args) throws Exception {
    +         // Let ToolRunner handle generic command-line options 
    +         int res = ToolRunner.run(new Configuration(), new Sort(), args);
    +         
    +         System.exit(res);
    +       }
    +     }
    + 

    + + @see GenericOptionsParser + @see ToolRunner]]> +
    +
    + + + + + + + + + + + + Tool by {@link Tool#run(String[])}, after + parsing with the given generic arguments. Uses the given + Configuration, or builds one if null. + + Sets the Tool's configuration with the possibly modified + version of the conf. + + @param conf Configuration for the Tool. + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + Tool with its Configuration. + + Equivalent to run(tool.getConf(), tool, args). + + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + + + ToolRunner can be used to run classes implementing + Tool interface. It works in conjunction with + {@link GenericOptionsParser} to parse the + + generic hadoop command line arguments and modifies the + Configuration of the Tool. The + application-specific options are passed along without being modified. +

    + + @see Tool + @see GenericOptionsParser]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + +
    diff --git a/aarch64/share/hadoop/common/jdiff/hadoop_0.20.0.xml b/aarch64/share/hadoop/common/jdiff/hadoop_0.20.0.xml new file mode 100644 index 0000000..ce6f91b --- /dev/null +++ b/aarch64/share/hadoop/common/jdiff/hadoop_0.20.0.xml @@ -0,0 +1,52140 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + final. + + @param name resource to be added, the classpath is examined for a file + with that name.]]> + + + + + + final. + + @param url url of the resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + final. + + @param file file-path of resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + final. + + @param in InputStream to deserialize the object from.]]> + + + + + + + + + + + name property, null if + no such property exists. + + Values are processed for variable expansion + before being returned. + + @param name the property name. + @return the value of the name property, + or null if no such property exists.]]> + + + + + + name property, without doing + variable expansion. + + @param name the property name. + @return the value of the name property, + or null if no such property exists.]]> + + + + + + + value of the name property. + + @param name property name. + @param value property value.]]> + + + + + + + + + + + + + + name property. If no such property + exists, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value, or defaultValue if the property + doesn't exist.]]> + + + + + + + name property as an int. + + If no such property exists, or if the specified value is not a valid + int, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as an int, + or defaultValue.]]> + + + + + + + name property to an int. + + @param name property name. + @param value int value of the property.]]> + + + + + + + name property as a long. + If no such property is specified, or if the specified value is not a valid + long, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a long, + or defaultValue.]]> + + + + + + + name property to a long. + + @param name property name. + @param value long value of the property.]]> + + + + + + + name property as a float. + If no such property is specified, or if the specified value is not a valid + float, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a float, + or defaultValue.]]> + + + + + + + name property to a float. + + @param name property name. + @param value property value.]]> + + + + + + + name property as a boolean. + If no such property is specified, or if the specified value is not a valid + boolean, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a boolean, + or defaultValue.]]> + + + + + + + name property to a boolean. + + @param name property name. + @param value boolean value of the property.]]> + + + + + + + + + + + + + + + + + + + + name property as + a collection of Strings. + If no such property is specified then empty collection is returned. +

    + This is an optimized version of {@link #getStrings(String)} + + @param name property name. + @return property value as a collection of Strings.]]> + + + + + + name property as + an array of Strings. + If no such property is specified then null is returned. + + @param name property name. + @return property value as an array of Strings, + or null.]]> + + + + + + + name property as + an array of Strings. + If no such property is specified then default value is returned. + + @param name property name. + @param defaultValue The default value + @return property value as an array of Strings, + or default value.]]> + + + + + + + name property as + as comma delimited values. + + @param name property name. + @param values The values]]> + + + + + + + + + + + + + + name property + as an array of Class. + The value of the property specifies a list of comma separated class names. + If no such property is specified, then defaultValue is + returned. + + @param name the property name. + @param defaultValue default value. + @return property value as a Class[], + or defaultValue.]]> + + + + + + + name property as a Class. + If no such property is specified, then defaultValue is + returned. + + @param name the class name. + @param defaultValue default value. + @return property value as a Class, + or defaultValue.]]> + + + + + + + + name property as a Class + implementing the interface specified by xface. + + If no such property is specified, then defaultValue is + returned. + + An exception is thrown if the returned class does not implement the named + interface. + + @param name the class name. + @param defaultValue default value. + @param xface the interface implemented by the named class. + @return property value as a Class, + or defaultValue.]]> + + + + + + + + name property to the name of a + theClass implementing the given interface xface. + + An exception is thrown if theClass does not implement the + interface xface. + + @param name property name. + @param theClass property value. + @param xface the interface implemented by the named class.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + + + + + name. + + @param name configuration resource name. + @return an input stream attached to the resource.]]> + + + + + + name. + + @param name configuration resource name. + @return a reader attached to the resource.]]> + + + + + + + + + + + + + + + String + key-value pairs in the configuration. + + @return an iterator over the entries.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + true to set quiet-mode on, false + to turn it off.]]> + + + + + + + + + + + + + + + + + + + Resources + +

    Configurations are specified by resources. A resource contains a set of + name/value pairs as XML data. Each resource is named by either a + String or by a {@link Path}. If named by a String, + then the classpath is examined for a file with that name. If named by a + Path, then the local filesystem is examined directly, without + referring to the classpath. + +

    Unless explicitly turned off, Hadoop by default specifies two + resources, loaded in-order from the classpath:

      +
    1. core-default.xml + : Read-only defaults for hadoop.
    2. +
    3. core-site.xml: Site-specific configuration for a given hadoop + installation.
    4. +
    + Applications may add additional resources, which are loaded + subsequent to these resources in the order they are added. + +

    Final Parameters

    + +

    Configuration parameters may be declared final. + Once a resource declares a value final, no subsequently-loaded + resource can alter that value. + For example, one might define a final parameter with: +

    +  <property>
    +    <name>dfs.client.buffer.dir</name>
    +    <value>/tmp/hadoop/dfs/client</value>
    +    <final>true</final>
    +  </property>
    + + Administrators typically define parameters as final in + core-site.xml for values that user applications may not alter. + +

    Variable Expansion

    + +

    Value strings are first processed for variable expansion. The + available properties are:

      +
    1. Other properties defined in this Configuration; and, if a name is + undefined here,
    2. +
    3. Properties in {@link System#getProperties()}.
    4. +
    + +

    For example, if a configuration resource contains the following property + definitions: +

    +  <property>
    +    <name>basedir</name>
    +    <value>/user/${user.name}</value>
    +  </property>
    +  
    +  <property>
    +    <name>tempdir</name>
    +    <value>${basedir}/tmp</value>
    +  </property>
    + + When conf.get("tempdir") is called, then ${basedir} + will be resolved to another property in this Configuration, while + ${user.name} would then ordinarily be resolved to the value + of the System property with that name.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DistributedCache is a facility provided by the Map-Reduce + framework to cache files (text, archives, jars etc.) needed by applications. +

    + +

    Applications specify the files, via urls (hdfs:// or http://) to be cached + via the {@link org.apache.hadoop.mapred.JobConf}. + The DistributedCache assumes that the + files specified via hdfs:// urls are already present on the + {@link FileSystem} at the path specified by the url.

    + +

    The framework will copy the necessary files on to the slave node before + any tasks for the job are executed on that node. Its efficiency stems from + the fact that the files are only copied once per job and the ability to + cache archives which are un-archived on the slaves.

    + +

    DistributedCache can be used to distribute simple, read-only + data/text files and/or more complex types such as archives, jars etc. + Archives (zip, tar and tgz/tar.gz files) are un-archived at the slave nodes. + Jars may be optionally added to the classpath of the tasks, a rudimentary + software distribution mechanism. Files have execution permissions. + Optionally users can also direct it to symlink the distributed cache file(s) + into the working directory of the task.

    + +

    DistributedCache tracks modification timestamps of the cache + files. Clearly the cache files should not be modified by the application + or externally while the job is executing.

    + +

    Here is an illustrative example on how to use the + DistributedCache:

    +

    +     // Setting up the cache for the application
    +     
    +     1. Copy the requisite files to the FileSystem:
    +     
    +     $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat  
    +     $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip  
    +     $ bin/hadoop fs -copyFromLocal mylib.jar /myapp/mylib.jar
    +     $ bin/hadoop fs -copyFromLocal mytar.tar /myapp/mytar.tar
    +     $ bin/hadoop fs -copyFromLocal mytgz.tgz /myapp/mytgz.tgz
    +     $ bin/hadoop fs -copyFromLocal mytargz.tar.gz /myapp/mytargz.tar.gz
    +     
    +     2. Setup the application's JobConf:
    +     
    +     JobConf job = new JobConf();
    +     DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"), 
    +                                   job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/map.zip", job);
    +     DistributedCache.addFileToClassPath(new Path("/myapp/mylib.jar"), job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytar.tar", job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytgz.tgz", job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytargz.tar.gz", job);
    +     
    +     3. Use the cached files in the {@link org.apache.hadoop.mapred.Mapper}
    +     or {@link org.apache.hadoop.mapred.Reducer}:
    +     
    +     public static class MapClass extends MapReduceBase  
    +     implements Mapper<K, V, K, V> {
    +     
    +       private Path[] localArchives;
    +       private Path[] localFiles;
    +       
    +       public void configure(JobConf job) {
    +         // Get the cached archives/files
    +         localArchives = DistributedCache.getLocalCacheArchives(job);
    +         localFiles = DistributedCache.getLocalCacheFiles(job);
    +       }
    +       
    +       public void map(K key, V value, 
    +                       OutputCollector<K, V> output, Reporter reporter) 
    +       throws IOException {
    +         // Use data from the cached archives/files here
    +         // ...
    +         // ...
    +         output.collect(k, v);
    +       }
    +     }
    +     
    + 

    + + @see org.apache.hadoop.mapred.JobConf + @see org.apache.hadoop.mapred.JobClient]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BufferedFSInputStream + with the specified buffer size, + and saves its argument, the input stream + in, for later use. An internal + buffer array of length size + is created and stored in buf. + + @param in the underlying input stream. + @param size the buffer size. + @exception IllegalArgumentException if size <= 0.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + setReplication of FileSystem + @param src file name + @param replication new replication + @throws IOException + @return true if successful; + false if file does not exist or is a directory]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + fs.scheme.class whose value names the FileSystem class. + The entire URI is passed to the FileSystem instance's initialize method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Return all the files that match filePattern and are not checksum + files. Results are sorted by their names. + +

    + A filename pattern is composed of regular characters and + special pattern matching characters, which are: + +

    +
    +
    +

    +

    ? +
    Matches any single character. + +

    +

    * +
    Matches zero or more characters. + +

    +

    [abc] +
    Matches a single character from character set + {a,b,c}. + +

    +

    [a-b] +
    Matches a single character from the character range + {a...b}. Note that character a must be + lexicographically less than or equal to character b. + +

    +

    [^a] +
    Matches a single character that is not from character set or range + {a}. Note that the ^ character must occur + immediately to the right of the opening bracket. + +

    +

    \c +
    Removes (escapes) any special meaning of character c. + +

    +

    {ab,cd} +
    Matches a string from the string set {ab, cd} + +

    +

    {ab,c{de,fh}} +
    Matches a string from the string set {ab, cde, cfh} + +
    +
    +
    + + @param pathPattern a regular expression specifying a pth pattern + + @return an array of paths that match the path pattern + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + All user code that may potentially use the Hadoop Distributed + File System should be written to use a FileSystem object. The + Hadoop DFS is a multi-machine system that appears as a single + disk. It's useful because of its fault tolerance and potentially + very large capacity. + +

    + The local implementation is {@link LocalFileSystem} and distributed + implementation is DistributedFileSystem.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FilterFileSystem contains + some other file system, which it uses as + its basic file system, possibly transforming + the data along the way or providing additional + functionality. The class FilterFileSystem + itself simply overrides all methods of + FileSystem with versions that + pass all requests to the contained file + system. Subclasses of FilterFileSystem + may further override some of these methods + and may also provide additional methods + and fields.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + buf at offset + and checksum into checksum. + The method is used for implementing read, therefore, it should be optimized + for sequential reading + @param pos chunkPos + @param buf desitination buffer + @param offset offset in buf at which to store data + @param len maximun number of bytes to read + @return number of bytes read]]> + + + + + + + + + + + + + + + + + -1 if the end of the + stream is reached. + @exception IOException if an I/O error occurs.]]> + + + + + + + + + This method implements the general contract of the corresponding + {@link InputStream#read(byte[], int, int) read} method of + the {@link InputStream} class. As an additional + convenience, it attempts to read as many bytes as possible by repeatedly + invoking the read method of the underlying stream. This + iterated read continues until one of the following + conditions becomes true:

      + +
    • The specified number of bytes have been read, + +
    • The read method of the underlying stream returns + -1, indicating end-of-file. + +
    If the first read on the underlying stream returns + -1 to indicate end-of-file then this method returns + -1. Otherwise this method returns the number of bytes + actually read. + + @param b destination buffer. + @param off offset at which to start storing bytes. + @param len maximum number of bytes to read. + @return the number of bytes read, or -1 if the end of + the stream has been reached. + @exception IOException if an I/O error occurs. + ChecksumException if any checksum error occurs]]> +
    + + + + + + + + + + + + + + + + + + n bytes of data from the + input stream. + +

    This method may skip more bytes than are remaining in the backing + file. This produces no exception and the number of bytes skipped + may include some number of bytes that were beyond the EOF of the + backing file. Attempting to read from the stream after skipping past + the end will result in -1 indicating the end of the file. + +

    If n is negative, no bytes are skipped. + + @param n the number of bytes to be skipped. + @return the actual number of bytes skipped. + @exception IOException if an I/O error occurs. + ChecksumException if the chunk to skip to is corrupted]]> + + + + + + + This method may seek past the end of the file. + This produces no exception and an attempt to read from + the stream will result in -1 indicating the end of the file. + + @param pos the postion to seek to. + @exception IOException if an I/O error occurs. + ChecksumException if the chunk to seek to is corrupted]]> + + + + + + + + + + len bytes from + stm + + @param stm an input stream + @param buf destiniation buffer + @param offset offset at which to store data + @param len number of bytes to read + @return actual number of bytes read + @throws IOException if there is any IO error]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + len bytes from the specified byte array + starting at offset off and generate a checksum for + each data chunk. + +

    This method stores bytes from the given array into this + stream's buffer before it gets checksumed. The buffer gets checksumed + and flushed to the underlying output stream when all data + in a checksum chunk are in the buffer. If the buffer is empty and + requested length is at least as large as the size of next checksum chunk + size, this method will checksum and write the chunk directly + to the underlying output stream. Thus it avoids uneccessary data copy. + + @param b the data. + @param off the start offset in the data. + @param len the number of bytes to write. + @exception IOException if an I/O error occurs.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if and only if pathname + should be included]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + trash feature. Files are moved to a user's trash + directory, a subdirectory of their home directory named ".Trash". Files are + initially moved to a current sub-directory of the trash directory. + Within that sub-directory their original path is preserved. Periodically + one may checkpoint the current trash and remove older checkpoints. (This + design permits trash management without enumeration of the full trash + content, without date support in the filesystem, and without clock + synchronization.)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} backed by an FTP client provided by Apache Commons Net. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (cause==null ? null : cause.toString()) (which + typically contains the class and detail message of cause). + @param cause the cause (which is saved for later retrieval by the + {@link #getCause()} method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is a tool for migrating data from an older to a newer version + of an S3 filesystem. +

    +

    + All files in the filesystem are migrated by re-writing the block metadata + - no datafiles are touched. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + Extracts AWS credentials from the filesystem URI or configuration. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A block-based {@link FileSystem} backed by + Amazon S3. +

    + @see NativeS3FileSystem]]> +
    +
    + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If f is a file, this method will make a single call to S3. + If f is a directory, this method will make a maximum of + (n / 1000) + 2 calls to S3, where n is the total number of + files and directories contained directly in f. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} for reading and writing files stored on + Amazon S3. + Unlike {@link org.apache.hadoop.fs.s3.S3FileSystem} this implementation + stores files on S3 in their + native form so they can be read by other S3 tools. +

    + @see org.apache.hadoop.fs.s3.S3FileSystem]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + . + @param name The name of the server + @param port The port to use on the server + @param findPort whether the server should start at the given port and + increment by 1 until it finds a free port. + @param conf Configuration]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + points to the log directory + "/static/" -> points to common static files (src/webapps/static) + "/" -> the jsp server code from (src/webapps/)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + nth value.]]> + + + + + + + + + + + + + + + + + + + + + nth value in the file.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + public class IntArrayWritable extends ArrayWritable { + public IntArrayWritable() { + super(IntWritable.class); + } + } + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a ByteWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new DataInputStream and + ByteArrayInputStream each time data is read. + +

    Typical usage is something like the following:

    +
    + DataInputBuffer buffer = new DataInputBuffer();
    + while (... loop condition ...) {
    +   byte[] data = ... get data ...;
    +   int dataLength = ... get data length ...;
    +   buffer.reset(data, dataLength);
    +   ... read buffer using DataInput methods ...
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new DataOutputStream and + ByteArrayOutputStream each time data is written. + +

    Typical usage is something like the following:

    +
    + DataOutputBuffer buffer = new DataOutputBuffer();
    + while (... loop condition ...) {
    +   buffer.reset();
    +   ... write buffer using DataOutput methods ...
    +   byte[] data = buffer.getData();
    +   int dataLength = buffer.getLength();
    +   ... write data to its ultimate destination ...
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + the class of the item + @param conf the configuration to store + @param item the object to be stored + @param keyName the name of the key to use + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param items the objects to be stored + @param keyName the name of the key to use + @throws IndexOutOfBoundsException if the items array is empty + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + DefaultStringifier offers convenience methods to store/load objects to/from + the configuration. + + @param the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a DoubleWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a FloatWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When two sequence files, which have same Key type but different Value + types, are mapped out to reduce, multiple Value types is not allowed. + In this case, this class can help you wrap instances with different types. +

    + +

    + Compared with ObjectWritable, this class is much more effective, + because ObjectWritable will append the class declaration as a String + into the output file in every Key-Value pair. +

    + +

    + Generic Writable implements {@link Configurable} interface, so that it will be + configured by the framework. The configuration is passed to the wrapped objects + implementing {@link Configurable} interface before deserialization. +

    + + how to use it:
    + 1. Write your own class, such as GenericObject, which extends GenericWritable.
    + 2. Implements the abstract method getTypes(), defines + the classes which will be wrapped in GenericObject in application. + Attention: this classes defined in getTypes() method, must + implement Writable interface. +

    + + The code looks like this: +
    + public class GenericObject extends GenericWritable {
    + 
    +   private static Class[] CLASSES = {
    +               ClassType1.class, 
    +               ClassType2.class,
    +               ClassType3.class,
    +               };
    +
    +   protected Class[] getTypes() {
    +       return CLASSES;
    +   }
    +
    + }
    + 
    + + @since Nov 8, 2006]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new InputStream and + ByteArrayInputStream each time data is read. + +

    Typical usage is something like the following:

    +
    + InputBuffer buffer = new InputBuffer();
    + while (... loop condition ...) {
    +   byte[] data = ... get data ...;
    +   int dataLength = ... get data length ...;
    +   buffer.reset(data, dataLength);
    +   ... read buffer using InputStream methods ...
    + }
    + 
    + @see DataInputBuffer + @see DataOutput]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a IntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + closes the input and output streams + at the end. + @param in InputStrem to read from + @param out OutputStream to write to + @param conf the Configuration object]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ignore any {@link IOException} or + null pointers. Must only be used for cleanup in exception handlers. + @param log the log to record problems to at debug level. Can be null. + @param closeables the objects to close]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a LongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A map is a directory containing two files, the data file, + containing all keys and values in the map, and a smaller index + file, containing a fraction of the keys. The fraction is determined by + {@link Writer#getIndexInterval()}. + +

    The index file is read entirely into memory. Thus key implementations + should try to keep themselves small. + +

    Map files are created by adding entries in-order. To maintain a large + database, perform updates by copying the previous version of a database and + merging in a sorted change list, to create a new version of the database in + a new file. Sorting large change lists can be done with {@link + SequenceFile.Sorter}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key and + val. Returns true if such a pair exists and false when at + the end of the map]]> + + + + + + + + + + + + + + + + key or if it does not exist, at the first entry + after the named key. + +- * @param key - key that we're trying to find +- * @param val - data value if key is found +- * @return - the key that was the closest match or null if eof.]]> + + + + + + + + + key does not exist, return + the first entry that falls just before the key. Otherwise, + return the record that sorts just after. + @return - the key that was the closest match or null if eof.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is an MD5Hash whose digest contains the + same values.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new OutputStream and + ByteArrayOutputStream each time data is written. + +

    Typical usage is something like the following:

    +
    + OutputBuffer buffer = new OutputBuffer();
    + while (... loop condition ...) {
    +   buffer.reset();
    +   ... write buffer using OutputStream methods ...
    +   byte[] data = buffer.getData();
    +   int dataLength = buffer.getLength();
    +   ... write data to its ultimate destination ...
    + }
    + 
    + @see DataOutputBuffer + @see InputBuffer]]> +
    +
    + + + + + + + + + + + + + + + A {@link Comparator} that operates directly on byte representations of + objects. +

    + @param + @see DeserializerComparator]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SequenceFiles are flat files consisting of binary key/value + pairs. + +

    SequenceFile provides {@link Writer}, {@link Reader} and + {@link Sorter} classes for writing, reading and sorting respectively.

    + + There are three SequenceFile Writers based on the + {@link CompressionType} used to compress key/value pairs: +
      +
    1. + Writer : Uncompressed records. +
    2. +
    3. + RecordCompressWriter : Record-compressed files, only compress + values. +
    4. +
    5. + BlockCompressWriter : Block-compressed files, both keys & + values are collected in 'blocks' + separately and compressed. The size of + the 'block' is configurable. +
    + +

    The actual compression algorithm used to compress key and/or values can be + specified by using the appropriate {@link CompressionCodec}.

    + +

    The recommended way is to use the static createWriter methods + provided by the SequenceFile to chose the preferred format.

    + +

    The {@link Reader} acts as the bridge and can read any of the above + SequenceFile formats.

    + +

    SequenceFile Formats

    + +

    Essentially there are 3 different formats for SequenceFiles + depending on the CompressionType specified. All of them share a + common header described below. + +

    +
      +
    • + version - 3 bytes of magic header SEQ, followed by 1 byte of actual + version number (e.g. SEQ4 or SEQ6) +
    • +
    • + keyClassName -key class +
    • +
    • + valueClassName - value class +
    • +
    • + compression - A boolean which specifies if compression is turned on for + keys/values in this file. +
    • +
    • + blockCompression - A boolean which specifies if block-compression is + turned on for keys/values in this file. +
    • +
    • + compression codec - CompressionCodec class which is used for + compression of keys and/or values (if compression is + enabled). +
    • +
    • + metadata - {@link Metadata} for this file. +
    • +
    • + sync - A sync marker to denote end of the header. +
    • +
    + +
    Uncompressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Value
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +
    Record-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Compressed Value
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +
    Block-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record Block +
        +
      • Compressed key-lengths block-size
      • +
      • Compressed key-lengths block
      • +
      • Compressed keys block-size
      • +
      • Compressed keys block
      • +
      • Compressed value-lengths block-size
      • +
      • Compressed value-lengths block
      • +
      • Compressed values block-size
      • +
      • Compressed values block
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +

    The compressed blocks of key lengths and value lengths consist of the + actual lengths of individual keys/values encoded in ZeroCompressedInteger + format.

    + + @see CompressionCodec]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key, skipping its + value. True if another entry exists, and false at end of file.]]> + + + + + + + + key and + val. Returns true if such a pair exists and false when at + end of file]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The position passed must be a position returned by {@link + SequenceFile.Writer#getLength()} when writing this file. To seek to an arbitrary + position, use {@link SequenceFile.Reader#sync(long)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SegmentDescriptor + @param segments the list of SegmentDescriptors + @param tmpDir the directory to write temporary files into + @return RawKeyValueIterator + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For best performance, applications should make sure that the {@link + Writable#readFields(DataInput)} implementation of their keys is + very efficient. In particular, it should avoid allocating memory.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This always returns a synchronized position. In other words, + immediately after calling {@link SequenceFile.Reader#seek(long)} with a position + returned by this method, {@link SequenceFile.Reader#next(Writable)} may be called. However + the key may be earlier in the file than key last written when this + method was called (e.g., with block-compression, it may be the first key + in the block that was being written when this method was called).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key. Returns + true if such a key exists and false when at the end of the set.]]> + + + + + + + key. + Returns key, or null if no match exists.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + position. Note that this + method avoids using the converter or doing String instatiation + @return the Unicode scalar value at position or -1 + if the position is invalid or points to a + trailing byte]]> + + + + + + + + + + what in the backing + buffer, starting as position start. The starting + position is measured in bytes and the return value is in + terms of byte position in the buffer. The backing buffer is + not converted to a string for this operation. + @return byte position of the first occurence of the search + string in the UTF-8 buffer or -1 if not found]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a Text with the same contents.]]> + + + + + + + + + + + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException.]]> + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException. + @return ByteBuffer: bytes stores at ByteBuffer.array() + and length is ByteBuffer.limit()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + In + addition, it provides methods for string traversal without converting the + byte array to a string.

    Also includes utilities for + serializing/deserialing a string, coding/decoding a string, checking if a + byte array contains valid UTF8 code, calculating the length of an encoded + string.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a UTF8 with the same contents.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + Also includes utilities for efficiently reading and writing UTF-8. + + @deprecated replaced by Text]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is useful when a class may evolve, so that instances written by the + old version of the class may still be processed by the new version. To + handle this situation, {@link #readFields(DataInput)} + implementations should catch {@link VersionMismatchException}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VIntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VLongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + out. + + @param out DataOuput to serialize this object into. + @throws IOException]]> + + + + + + + in. + +

    For efficiency, implementations should attempt to re-use storage in the + existing object where possible.

    + + @param in DataInput to deseriablize this object from. + @throws IOException]]> +
    + + + Any key or value type in the Hadoop Map-Reduce + framework implements this interface.

    + +

    Implementations typically implement a static read(DataInput) + method which constructs a new instance, calls {@link #readFields(DataInput)} + and returns the instance.

    + +

    Example:

    +

    +     public class MyWritable implements Writable {
    +       // Some data     
    +       private int counter;
    +       private long timestamp;
    +       
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +       
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +       
    +       public static MyWritable read(DataInput in) throws IOException {
    +         MyWritable w = new MyWritable();
    +         w.readFields(in);
    +         return w;
    +       }
    +     }
    + 

    ]]> +
    + + + + + + + + WritableComparables can be compared to each other, typically + via Comparators. Any type which is to be used as a + key in the Hadoop Map-Reduce framework should implement this + interface.

    + +

    Example:

    +

    +     public class MyWritableComparable implements WritableComparable {
    +       // Some data
    +       private int counter;
    +       private long timestamp;
    +       
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +       
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +       
    +       public int compareTo(MyWritableComparable w) {
    +         int thisValue = this.value;
    +         int thatValue = ((IntWritable)o).value;
    +         return (thisValue < thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
    +       }
    +     }
    + 

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The default implementation reads the data into two {@link + WritableComparable}s (using {@link + Writable#readFields(DataInput)}, then calls {@link + #compare(WritableComparable,WritableComparable)}.]]> + + + + + + + The default implementation uses the natural ordering, calling {@link + Comparable#compareTo(Object)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This base implemenation uses the natural ordering. To define alternate + orderings, override {@link #compare(WritableComparable,WritableComparable)}. + +

    One may optimize compare-intensive operations by overriding + {@link #compare(byte[],int,int,byte[],int,int)}. Static utility methods are + provided to assist in optimized implementations of this method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Enum type + @param in DataInput to read from + @param enumType Class type of Enum + @return Enum represented by String read from DataInput + @throws IOException]]> + + + + + + + + + + + + + + + + len number of bytes in input streamin + @param in input stream + @param len number of bytes to skip + @throws IOException when skipped less number of bytes]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CompressionCodec for which to get the + Compressor + @return Compressor for the given + CompressionCodec from the pool or a new one]]> + + + + + + CompressionCodec for which to get the + Decompressor + @return Decompressor for the given + CompressionCodec the pool or a new one]]> + + + + + + Compressor to be returned to the pool]]> + + + + + + Decompressor to be returned to the + pool]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Implementations are assumed to be buffered. This permits clients to + reposition the underlying input stream then call {@link #resetState()}, + without having to also synchronize client buffers.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + true if a preset dictionary is needed for decompression. + @return true if a preset dictionary is needed for decompression]]> + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FIXME: This array should be in a private or package private location, + since it could be modified by malicious code. +

    ]]> +
    + + + + This interface is public for historical purposes. You should have no need to + use it. +

    ]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Although BZip2 headers are marked with the magic "Bz" this + constructor expects the next byte in the stream to be the first one after + the magic. Thus callers have to skip the first two bytes. Otherwise this + constructor will throw an exception. +

    + + @throws IOException + if the stream content is malformed or an I/O error occurs. + @throws NullPointerException + if in == null]]> +
    +
    + + + + + + + + + + + + + + + The decompression requires large amounts of memory. Thus you should call the + {@link #close() close()} method as soon as possible, to force + CBZip2InputStream to release the allocated memory. See + {@link CBZip2OutputStream CBZip2OutputStream} for information about memory + usage. +

    + +

    + CBZip2InputStream reads bytes from the compressed source stream via + the single byte {@link java.io.InputStream#read() read()} method exclusively. + Thus you should consider to use a buffered source stream. +

    + +

    + Instances of this class are not threadsafe. +

    ]]> +
    +
    + + + + + + + + CBZip2OutputStream with a blocksize of 900k. + +

    + Attention: The caller is resonsible to write the two BZip2 magic + bytes "BZ" to the specified stream prior to calling this + constructor. +

    + + @param out * + the destination stream. + + @throws IOException + if an I/O error occurs in the specified stream. + @throws NullPointerException + if out == null.]]> +
    +
    + + + + CBZip2OutputStream with specified blocksize. + +

    + Attention: The caller is resonsible to write the two BZip2 magic + bytes "BZ" to the specified stream prior to calling this + constructor. +

    + + + @param out + the destination stream. + @param blockSize + the blockSize as 100k units. + + @throws IOException + if an I/O error occurs in the specified stream. + @throws IllegalArgumentException + if (blockSize < 1) || (blockSize > 9). + @throws NullPointerException + if out == null. + + @see #MIN_BLOCKSIZE + @see #MAX_BLOCKSIZE]]> +
    +
    + + + + + + + + + + + + + inputLength this method returns MAX_BLOCKSIZE + always. + + @param inputLength + The length of the data which will be compressed by + CBZip2OutputStream.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + == 1.]]> + + + + + == 9.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If you are ever unlucky/improbable enough to get a stack overflow whilst + sorting, increase the following constant and try again. In practice I + have never seen the stack go above 27 elems, so the following limit seems + very generous. +

    ]]> +
    +
    + + + The compression requires large amounts of memory. Thus you should call the + {@link #close() close()} method as soon as possible, to force + CBZip2OutputStream to release the allocated memory. +

    + +

    + You can shrink the amount of allocated memory and maybe raise the compression + speed by choosing a lower blocksize, which in turn may cause a lower + compression ratio. You can avoid unnecessary memory allocation by avoiding + using a blocksize which is bigger than the size of the input. +

    + +

    + You can compute the memory usage for compressing by the following formula: +

    + +
    + <code>400k + (9 * blocksize)</code>.
    + 
    + +

    + To get the memory required for decompression by {@link CBZip2InputStream + CBZip2InputStream} use +

    + +
    + <code>65k + (5 * blocksize)</code>.
    + 
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Memory usage by blocksize
    Blocksize Compression
    + memory usage
    Decompression
    + memory usage
    100k1300k565k
    200k2200k1065k
    300k3100k1565k
    400k4000k2065k
    500k4900k2565k
    600k5800k3065k
    700k6700k3565k
    800k7600k4065k
    900k8500k4565k
    + +

    + For decompression CBZip2InputStream allocates less memory if the + bzipped input is smaller than one block. +

    + +

    + Instances of this class are not threadsafe. +

    + +

    + TODO: Update to BZip2 1.0.1 +

    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return the total (non-negative) number of uncompressed bytes input so far]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return the total (non-negative) number of uncompressed bytes input so far]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if native-zlib is loaded & initialized + and can be loaded for this job, else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Keep trying a limited number of times, waiting a fixed time between attempts, + and then fail by re-throwing the exception. +

    ]]> +
    +
    + + + + + + + Keep trying for a maximum time, waiting a fixed time between attempts, + and then fail by re-throwing the exception. +

    ]]> +
    +
    + + + + + + + Keep trying a limited number of times, waiting a growing amount of time between attempts, + and then fail by re-throwing the exception. + The time between attempts is sleepTime mutliplied by the number of tries so far. +

    ]]> +
    +
    + + + + + + + Keep trying a limited number of times, waiting a growing amount of time between attempts, + and then fail by re-throwing the exception. + The time between attempts is sleepTime mutliplied by a random + number in the range of [0, 2 to the number of retries) +

    ]]> +
    +
    + + + + + + Set a default policy with some explicit handlers for specific exceptions. +

    ]]> +
    +
    + + + + + + A retry policy for RemoteException + Set a default policy with some explicit handlers for specific exceptions. +

    ]]> +
    +
    + + + + Try once, and fail by re-throwing the exception. + This corresponds to having no retry mechanism in place. +

    ]]> +
    +
    + + + + Try once, and fail silently for void methods, or by + re-throwing the exception for non-void methods. +

    ]]> +
    +
    + + + + Keep trying forever. +

    ]]> +
    +
    + + + A collection of useful implementations of {@link RetryPolicy}. +

    ]]> +
    +
    + + + + + + + + + + Determines whether the framework should retry a + method for the given exception, and the number + of retries that have been made for that operation + so far. +

    + @param e The exception that caused the method to fail. + @param retries The number of times the method has been retried. + @return true if the method should be retried, + false if the method should not be retried + but shouldn't fail with an exception (only for void methods). + @throws Exception The re-thrown exception e indicating + that the method failed and should not be retried further.]]> +
    +
    + + + Specifies a policy for retrying method failures. + Implementations of this interface should be immutable. +

    ]]> +
    +
    + + + + + + + + + + + + Create a proxy for an interface of an implementation class + using the same retry policy for each method in the interface. +

    + @param iface the interface that the retry will implement + @param implementation the instance whose methods should be retried + @param retryPolicy the policy for retirying method call failures + @return the retry proxy]]> +
    +
    + + + + + + + Create a proxy for an interface of an implementation class + using the a set of retry policies specified by method name. + If no retry policy is defined for a method then a default of + {@link RetryPolicies#TRY_ONCE_THEN_FAIL} is used. +

    + @param iface the interface that the retry will implement + @param implementation the instance whose methods should be retried + @param methodNameToPolicyMap a map of method names to retry policies + @return the retry proxy]]> +
    +
    + + + A factory for creating retry proxies. +

    ]]> +
    +
    + +
    + + + + + + + + Prepare the deserializer for reading.

    ]]> +
    +
    + + + + + + Deserialize the next object from the underlying input stream. + If the object t is non-null then this deserializer + may set its internal state to the next object read from the input + stream. Otherwise, if the object t is null a new + deserialized object will be created. +

    + @return the deserialized object]]> +
    +
    + + + + Close the underlying input stream and clear up any resources.

    ]]> +
    +
    + + + Provides a facility for deserializing objects of type from an + {@link InputStream}. +

    + +

    + Deserializers are stateful, but must not buffer the input since + other producers may read from the input between calls to + {@link #deserialize(Object)}. +

    + @param ]]> +
    +
    + + + + + + + + + + + + + + + + + + A {@link RawComparator} that uses a {@link Deserializer} to deserialize + the objects to be compared so that the standard {@link Comparator} can + be used to compare them. +

    +

    + One may optimize compare-intensive operations by using a custom + implementation of {@link RawComparator} that operates directly + on byte representations. +

    + @param ]]> +
    +
    + + + + + + + + + + + + + + + + + + An experimental {@link Serialization} for Java {@link Serializable} classes. +

    + @see JavaSerializationComparator]]> +
    +
    + + + + + + + + + + + + + A {@link RawComparator} that uses a {@link JavaSerialization} + {@link Deserializer} to deserialize objects that are then compared via + their {@link Comparable} interfaces. +

    + @param + @see JavaSerialization]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + Encapsulates a {@link Serializer}/{@link Deserializer} pair. +

    + @param ]]> +
    +
    + + + + + + + Serializations are found by reading the io.serializations + property from conf, which is a comma-delimited list of + classnames. +

    ]]> +
    +
    + + + + + + + + + + + + A factory for {@link Serialization}s. +

    ]]> +
    +
    + + + + + + + + Prepare the serializer for writing.

    ]]> +
    +
    + + + + + Serialize t to the underlying output stream.

    ]]> +
    +
    + + + + Close the underlying output stream and clear up any resources.

    ]]> +
    +
    + + + Provides a facility for serializing objects of type to an + {@link OutputStream}. +

    + +

    + Serializers are stateful, but must not buffer the output since + other producers may write to the output between calls to + {@link #serialize(Object)}. +

    + @param ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + param, to the IPC server running at + address, returning the value. Throws exceptions if there are + network problems or if the remote code threw an exception. + @deprecated Use {@link #call(Writable, InetSocketAddress, Class, UserGroupInformation)} instead]]> + + + + + + + + + + param, to the IPC server running at + address with the ticket credentials, returning + the value. + Throws exceptions if there are network problems or if the remote code + threw an exception. + @deprecated Use {@link #call(Writable, InetSocketAddress, Class, UserGroupInformation)} instead]]> + + + + + + + + + + + param, to the IPC server running at + address which is servicing the protocol protocol, + with the ticket credentials, returning the value. + Throws exceptions if there are network problems or if the remote code + threw an exception.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Unwraps any IOException. + + @param lookupTypes the desired exception class. + @return IOException, which is either the lookupClass exception or this.]]> + + + + + This unwraps any Throwable that has a constructor taking + a String as a parameter. + Otherwise it returns this. + + @return Throwable]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + protocol is a Java interface. All parameters and return types must + be one of: + +
    • a primitive type, boolean, byte, + char, short, int, long, + float, double, or void; or
    • + +
    • a {@link String}; or
    • + +
    • a {@link Writable}; or
    • + +
    • an array of the above types
    + + All methods in the protocol should throw only IOException. No field data of + the protocol instance is transmitted.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + handlerCount determines + the number of handler threads that will be used to process calls.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + ,name=RpcActivityForPort" + + Many of the activity metrics are sampled and averaged on an interval + which can be specified in the metrics config file. +

    + For the metrics that are sampled and averaged, one must specify + a metrics context that does periodic update calls. Most metrics contexts do. + The default Null metrics context however does NOT. So if you aren't + using any other metrics context then you can turn on the viewing and averaging + of sampled metrics by specifying the following two lines + in the hadoop-meterics.properties file: +

    +        rpc.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
    +        rpc.period=10
    +  
    +

    + Note that the metrics are collected regardless of the context used. + The context with the update thread is used to average the data periodically + + + + Impl details: We use a dynamic mbean that gets the list of the metrics + from the metrics registry passed as an argument to the constructor]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class has a number of metrics variables that are publicly accessible; + these variables (objects) have methods to update their values; + for example: +

    {@link #rpcQueueTime}.inc(time)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For the statistics that are sampled and averaged, one must specify + a metrics context that does periodic update calls. Most do. + The default Null metrics context however does NOT. So if you aren't + using any other metrics context then you can turn on the viewing and averaging + of sampled metrics by specifying the following two lines + in the hadoop-meterics.properties file: +

    +        rpc.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
    +        rpc.period=10
    +  
    +

    + Note that the metrics are collected regardless of the context used. + The context with the update thread is used to average the data periodically]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When constructing the instance, if the factory property + contextName.class exists, + its value is taken to be the name of the class to instantiate. Otherwise, + the default is to create an instance of + org.apache.hadoop.metrics.spi.NullContext, which is a + dummy "no-op" context which will cause all metric data to be discarded. + + @param contextName the name of the context + @return the named MetricsContext]]> + + + + + + + + + + + + + + + + + + + + + When the instance is constructed, this method checks if the file + hadoop-metrics.properties exists on the class path. If it + exists, it must be in the format defined by java.util.Properties, and all + the properties in the file are set as attributes on the newly created + ContextFactory instance. + + @return the singleton ContextFactory instance]]> + + + + getFactory() method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + startMonitoring() again after calling + this. + @see #close()]]> + + + + + + + + + + + + + + + + recordName. + Throws an exception if the metrics implementation is configured with a fixed + set of record names and recordName is not in that set. + + @param recordName the name of the record + @throws MetricsException if recordName conflicts with configuration data]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A record name identifies the kind of data to be reported. For example, a + program reporting statistics relating to the disks on a computer might use + a record name "diskStats".

    + + A record has zero or more tags. A tag has a name and a value. To + continue the example, the "diskStats" record might use a tag named + "diskName" to identify a particular disk. Sometimes it is useful to have + more than one tag, so there might also be a "diskType" with value "ide" or + "scsi" or whatever.

    + + A record also has zero or more metrics. These are the named + values that are to be reported to the metrics system. In the "diskStats" + example, possible metric names would be "diskPercentFull", "diskPercentBusy", + "kbReadPerSecond", etc.

    + + The general procedure for using a MetricsRecord is to fill in its tag and + metric values, and then call update() to pass the record to the + client library. + Metric data is not immediately sent to the metrics system + each time that update() is called. + An internal table is maintained, identified by the record name. This + table has columns + corresponding to the tag and the metric names, and rows + corresponding to each unique set of tag values. An update + either modifies an existing row in the table, or adds a new row with a set of + tag values that are different from all the other rows. Note that if there + are no tags, then there can be at most one row in the table.

    + + Once a row is added to the table, its data will be sent to the metrics system + on every timer period, whether or not it has been updated since the previous + timer period. If this is inappropriate, for example if metrics were being + reported by some transient object in an application, the remove() + method can be used to remove the row and thus stop the data from being + sent.

    + + Note that the update() method is atomic. This means that it is + safe for different threads to be updating the same metric. More precisely, + it is OK for different threads to call update() on MetricsRecord instances + with the same set of tag names and tag values. Different threads should + not use the same MetricsRecord instance at the same time.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MetricsContext.registerUpdater().]]> + + + + + + + + + + + + + + + + + + + + + + + + + fileName attribute, + if specified. Otherwise the data will be written to standard + output.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is configured by setting ContextFactory attributes which in turn + are usually configured through a properties file. All the attributes are + prefixed by the contextName. For example, the properties file might contain: +

    + myContextName.fileName=/tmp/metrics.log
    + myContextName.period=5
    + 
    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + contextName.tableName. The returned map consists of + those attributes with the contextName and tableName stripped off.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + recordName. + Throws an exception if the metrics implementation is configured with a fixed + set of record names and recordName is not in that set. + + @param recordName the name of the record + @throws MetricsException if recordName conflicts with configuration data]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class implements the internal table of metric data, and the timer + on which data is to be sent to the metrics system. Subclasses must + override the abstract emitRecord method in order to transmit + the data.

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + update + and remove().]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hostname or hostname:port. If + the specs string is null, defaults to localhost:defaultPort. + + @return a list of InetSocketAddress objects.]]> + + + + + + + + + + + + + + + + + + + ,name=" + Where the and are the supplied parameters + + @param serviceName + @param nameName + @param theMbean - the MBean to register + @return the named used to register the MBean]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hadoop.rpc.socket.factory.class.<ClassName>. When no + such parameter exists then fall back on the default socket factory as + configured by hadoop.rpc.socket.factory.class.default. If + this default socket factory is not configured, then fall back on the JVM + default socket factory. + + @param conf the configuration + @param clazz the class (usually a {@link VersionedProtocol}) + @return a socket factory]]> + + + + + + hadoop.rpc.socket.factory.default + + @param conf the configuration + @return the default socket factory as specified in the configuration or + the JVM default socket factory if the configuration does not + contain a default socket factory property.]]> + + + + + + + + + + + + + : + ://:/]]> + + + + + + + + : + ://:/]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + From documentation for {@link #getInputStream(Socket, long)}:
    + Returns InputStream for the socket. If the socket has an associated + SocketChannel then it returns a + {@link SocketInputStream} with the given timeout. If the socket does not + have a channel, {@link Socket#getInputStream()} is returned. In the later + case, the timeout argument is ignored and the timeout set with + {@link Socket#setSoTimeout(int)} applies for reads.

    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getInputStream()}. + + @see #getInputStream(Socket, long) + + @param socket + @return InputStream for reading from the socket. + @throws IOException]]> +
    +
    + + + + + +
    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getInputStream()}. + + @see Socket#getChannel() + + @param socket + @param timeout timeout in milliseconds. This may not always apply. zero + for waiting as long as necessary. + @return InputStream for reading from the socket. + @throws IOException]]> +
    +
    + + + + +
    + + From documentation for {@link #getOutputStream(Socket, long)} :
    + Returns OutputStream for the socket. If the socket has an associated + SocketChannel then it returns a + {@link SocketOutputStream} with the given timeout. If the socket does not + have a channel, {@link Socket#getOutputStream()} is returned. In the later + case, the timeout argument is ignored and the write will wait until + data is available.

    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getOutputStream()}. + + @see #getOutputStream(Socket, long) + + @param socket + @return OutputStream for writing to the socket. + @throws IOException]]> +
    +
    + + + + + +
    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getOutputStream()}. + + @see Socket#getChannel() + + @param socket + @param timeout timeout in milliseconds. This may not always apply. zero + for waiting as long as necessary. + @return OutputStream for writing to the socket. + @throws IOException]]> +
    +
    + + + + + + + socket.connect(endpoint, timeout). If + socket.getChannel() returns a non-null channel, + connect is implemented using Hadoop's selectors. This is done mainly + to avoid Sun's connect implementation from creating thread-local + selectors, since Hadoop does not have control on when these are closed + and could end up taking all the available file descriptors. + + @see java.net.Socket#connect(java.net.SocketAddress, int) + + @param socket + @param endpoint + @param timeout - timeout in milliseconds]]> + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + node + + @param node + a node + @return true if node is already in the tree; false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + scope + if scope starts with ~, choose one from the all nodes except for the + ones in scope; otherwise, choose one from scope + @param scope range of nodes from which a node will be choosen + @return the choosen node]]> + + + + + + + scope but not in excludedNodes + if scope starts with ~, return the number of nodes that are not + in scope and excludedNodes; + @param scope a path string that may start with ~ + @param excludedNodes a list of nodes + @return number of available nodes]]> + + + + + + + + + + + + reader + It linearly scans the array, if a local node is found, swap it with + the first element of the array. + If a local rack node is found, swap it with the first element following + the local node. + If neither local node or local rack node is found, put a random replica + location at postion 0. + It leaves the rest nodes untouched.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + Create a new input stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + + @see SocketInputStream#SocketInputStream(ReadableByteChannel, long) + + @param socket should have a channel associated with it. + @param timeout timeout timeout in milliseconds. must not be negative. + @throws IOException]]> +
    +
    + + + +
    + + Create a new input stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + @see SocketInputStream#SocketInputStream(ReadableByteChannel, long) + + @param socket should have a channel associated with it. + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + +
    + + Create a new ouput stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + + @see SocketOutputStream#SocketOutputStream(WritableByteChannel, long) + + @param socket should have a channel associated with it. + @param timeout timeout timeout in milliseconds. must not be negative. + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + = getCount(). + @param newCapacity The new capacity in bytes.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Index idx = startVector(...); + while (!idx.done()) { + .... // read element of a vector + idx.incr(); + } + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This task takes the given record definition files and compiles them into + java or c++ + files. It is then up to the user to compile the generated files. + +

    The task requires the file or the nested fileset element to be + specified. Optional attributes are language (set the output + language, default is "java"), + destdir (name of the destination directory for generated java/c++ + code, default is ".") and failonerror (specifies error handling + behavior. default is true). +

    Usage

    +
    + <recordcc
    +       destdir="${basedir}/gensrc"
    +       language="java">
    +   <fileset include="**\/*.jr" />
    + </recordcc>
    + 
    ]]> +
    +
    + +
    + + + + + + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (cause==null ? null : cause.toString()) (which + typically contains the class and detail message of cause). + @param cause the cause (which is saved for later retrieval by the + {@link #getCause()} method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.)]]> + + + + + + + + + + + + + Group with the given groupname. + @param group group name]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ugi. + @param ugi user + @return the {@link Subject} for the user identified by ugi]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ugi as a comma separated string in + conf as a property attr + + The String starts with the user name followed by the default group names, + and other group names. + + @param conf configuration + @param attr property name + @param ugi a UnixUserGroupInformation]]> + + + + + + + + conf + + The object is expected to store with the property name attr + as a comma separated string that starts + with the user name followed by group names. + If the property name is not defined, return null. + It's assumed that there is only one UGI per user. If this user already + has a UGI in the ugi map, return the ugi in the map. + Otherwise, construct a UGI from the configuration, store it in the + ugi map and return it. + + @param conf configuration + @param attr property name + @return a UnixUGI + @throws LoginException if the stored string is ill-formatted.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + User with the given username. + @param user user name]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (cause==null ? null : cause.toString()) (which + typically contains the class and detail message of cause). + @param cause the cause (which is saved for later retrieval by the + {@link #getCause()} method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.)]]> + + + + + + + + + + + + + + does not provide the stack trace for security purposes.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + service as related to + Service Level Authorization for Hadoop. + + Each service defines it's configuration key and also the necessary + {@link Permission} required to access the service.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + in]]> + + + + + + + out.]]> + + + + + + + + + + reset is true, then resets the checksum. + @return number of bytes written. Will be equal to getChecksumSize();]]> + + + + + + + + + reset is true, then resets the checksum. + @return number of bytes written. Will be equal to getChecksumSize();]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GenericOptionsParser to parse only the generic Hadoop + arguments. + + The array of string arguments other than the generic arguments can be + obtained by {@link #getRemainingArgs()}. + + @param conf the Configuration to modify. + @param args command-line arguments.]]> + + + + + GenericOptionsParser to parse given options as well + as generic Hadoop options. + + The resulting CommandLine object can be obtained by + {@link #getCommandLine()}. + + @param conf the configuration to modify + @param options options built by the caller + @param args User-specified arguments]]> + + + + + Strings containing the un-parsed arguments + or empty array if commandLine was not defined.]]> + + + + + + + + + + CommandLine object + to process the parsed arguments. + + Note: If the object is created with + {@link #GenericOptionsParser(Configuration, String[])}, then returned + object will only contain parsed generic options. + + @return CommandLine representing list of arguments + parsed against Options descriptor.]]> + + + + + + + + + + + + + + + + + GenericOptionsParser is a utility to parse command line + arguments generic to the Hadoop framework. + + GenericOptionsParser recognizes several standarad command + line arguments, enabling applications to easily specify a namenode, a + jobtracker, additional configuration resources etc. + +

    Generic Options

    + +

    The supported generic options are:

    +

    +     -conf <configuration file>     specify a configuration file
    +     -D <property=value>            use value for given property
    +     -fs <local|namenode:port>      specify a namenode
    +     -jt <local|jobtracker:port>    specify a job tracker
    +     -files <comma separated list of files>    specify comma separated
    +                            files to be copied to the map reduce cluster
    +     -libjars <comma separated list of jars>   specify comma separated
    +                            jar files to include in the classpath.
    +     -archives <comma separated list of archives>    specify comma
    +             separated archives to be unarchived on the compute machines.
    +
    + 

    + +

    The general command line syntax is:

    +

    + bin/hadoop command [genericOptions] [commandOptions]
    + 

    + +

    Generic command line arguments might modify + Configuration objects, given to constructors.

    + +

    The functionality is implemented using Commons CLI.

    + +

    Examples:

    +

    + $ bin/hadoop dfs -fs darwin:8020 -ls /data
    + list /data directory in dfs with namenode darwin:8020
    + 
    + $ bin/hadoop dfs -D fs.default.name=darwin:8020 -ls /data
    + list /data directory in dfs with namenode darwin:8020
    +     
    + $ bin/hadoop dfs -conf hadoop-site.xml -ls /data
    + list /data directory in dfs with conf specified in hadoop-site.xml
    +     
    + $ bin/hadoop job -D mapred.job.tracker=darwin:50020 -submit job.xml
    + submit a job to job tracker darwin:50020
    +     
    + $ bin/hadoop job -jt darwin:50020 -submit job.xml
    + submit a job to job tracker darwin:50020
    +     
    + $ bin/hadoop job -jt local -submit job.xml
    + submit a job to local runner
    + 
    + $ bin/hadoop jar -libjars testlib.jar 
    + -archives test.tgz -files file.txt inputjar args
    + job submission with libjars, files and archives
    + 

    + + @see Tool + @see ToolRunner]]> +
    +
    + + + + + + + + + Class<T>) of the + argument of type T. + @param The type of the argument + @param t the object to get it class + @return Class<T>]]> + + + + + + + List<T> to a an array of + T[]. + @param c the Class object of the items in the list + @param list the list to convert]]> + + + + + + List<T> to a an array of + T[]. + @param list the list to convert + @throws ArrayIndexOutOfBoundsException if the list is empty. + Use {@link #toArray(Class, List)} if the list may be empty.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + io.file.buffer.size specified in the given + Configuration. + @param in input stream + @param conf configuration + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if native-hadoop is loaded, + else false]]> + + + + + + true if native hadoop libraries, if present, can be + used for this job; false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { pq.top().change(); pq.adjustTop(); } + instead of
    +  { o = pq.pop(); o.change(); pq.push(o); }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Clients and/or applications can use the provided Progressable + to explicitly report progress to the Hadoop framework. This is especially + important for operations which take an insignificant amount of time since, + in-lieu of the reported progress, the framework has to assume that an error + has occured and time-out the operation.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Class is to be obtained + @return the correctly typed Class of the given object.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Hadoop Pipes + or Hadoop Streaming. + + It also checks to ensure that we are running on a *nix platform else + (e.g. in Cygwin/Windows) it returns null. + @param conf configuration + @return a String[] with the ulimit command arguments or + null if we are running on a non *nix platform or + if the limit is unspecified.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Shell interface. + @param cmd shell command to execute. + @return the output of the executed command.]]> + + + + + + + + Shell interface. + @param env the map of environment key=value + @param cmd shell command to execute. + @return the output of the executed command.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + Shell can be used to run unix commands like du or + df. It also offers facilities to gate commands by + time-intervals.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ShellCommandExecutorshould be used in cases where the output + of the command needs no explicit parsing and where the command, working + directory and the environment remains unchanged. The output of the command + is stored as-is and is expected to be small.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ArrayList of string values]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + charToEscape in the string + with the escape char escapeChar + + @param str string + @param escapeChar escape char + @param charToEscape the char to be escaped + @return an escaped string]]> + + + + + + + + + + + + + + + + + + + + + + charToEscape in the string + with the escape char escapeChar + + @param str string + @param escapeChar escape char + @param charToEscape the escaped char + @return an unescaped string]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tool, is the standard for any Map-Reduce tool/application. + The tool/application should delegate the handling of + + standard command-line options to {@link ToolRunner#run(Tool, String[])} + and only handle its custom arguments.

    + +

    Here is how a typical Tool is implemented:

    +

    +     public class MyApp extends Configured implements Tool {
    +     
    +       public int run(String[] args) throws Exception {
    +         // Configuration processed by ToolRunner
    +         Configuration conf = getConf();
    +         
    +         // Create a JobConf using the processed conf
    +         JobConf job = new JobConf(conf, MyApp.class);
    +         
    +         // Process custom command-line options
    +         Path in = new Path(args[1]);
    +         Path out = new Path(args[2]);
    +         
    +         // Specify various job-specific parameters     
    +         job.setJobName("my-app");
    +         job.setInputPath(in);
    +         job.setOutputPath(out);
    +         job.setMapperClass(MyApp.MyMapper.class);
    +         job.setReducerClass(MyApp.MyReducer.class);
    +
    +         // Submit the job, then poll for progress until the job is complete
    +         JobClient.runJob(job);
    +       }
    +       
    +       public static void main(String[] args) throws Exception {
    +         // Let ToolRunner handle generic command-line options 
    +         int res = ToolRunner.run(new Configuration(), new Sort(), args);
    +         
    +         System.exit(res);
    +       }
    +     }
    + 

    + + @see GenericOptionsParser + @see ToolRunner]]> +
    +
    + + + + + + + + + + + + Tool by {@link Tool#run(String[])}, after + parsing with the given generic arguments. Uses the given + Configuration, or builds one if null. + + Sets the Tool's configuration with the possibly modified + version of the conf. + + @param conf Configuration for the Tool. + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + Tool with its Configuration. + + Equivalent to run(tool.getConf(), tool, args). + + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + + + ToolRunner can be used to run classes implementing + Tool interface. It works in conjunction with + {@link GenericOptionsParser} to parse the + + generic hadoop command line arguments and modifies the + Configuration of the Tool. The + application-specific options are passed along without being modified. +

    + + @see Tool + @see GenericOptionsParser]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Bloom filter, as defined by Bloom in 1970. +

    + The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by + the networking research community in the past decade thanks to the bandwidth efficiencies that it + offers for the transmission of set membership information between networked hosts. A sender encodes + the information into a bit vector, the Bloom filter, that is more compact than a conventional + representation. Computation and space costs for construction are linear in the number of elements. + The receiver uses the filter to test whether various elements are members of the set. Though the + filter will occasionally return a false positive, it will never return a false negative. When creating + the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size. + +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + + @see Space/Time Trade-Offs in Hash Coding with Allowable Errors]]> + + + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + this counting Bloom filter. +

    + Invariant: nothing happens if the specified key does not belong to this counter Bloom filter. + @param key The key to remove.]]> + + + + + + + + + + + + key -> count map. +

    NOTE: due to the bucket size of this filter, inserting the same + key more than 15 times will cause an overflow at all filter positions + associated with this key, and it will significantly increase the error + rate for this and other keys. For this reason the filter can only be + used to store small count values 0 <= N << 15. + @param key key to be tested + @return 0 if the key is not present. Otherwise, a positive value v will + be returned such that v == count with probability equal to the + error rate of this filter, and v > count otherwise. + Additionally, if the filter experienced an underflow as a result of + {@link #delete(Key)} operation, the return value may be lower than the + count with the probability of the false negative rate of such + filter.]]> + + + + + + + + + + + + + + + + + + + + + + counting Bloom filter, as defined by Fan et al. in a ToN + 2000 paper. +

    + A counting Bloom filter is an improvement to standard a Bloom filter as it + allows dynamic additions and deletions of set membership information. This + is achieved through the use of a counting vector instead of a bit vector. +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + + @see Summary cache: a scalable wide-area web cache sharing protocol]]> + + + + + + + + + + + + + + Builds an empty Dynamic Bloom filter. + @param vectorSize The number of bits in the vector. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}). + @param nr The threshold for the maximum number of keys to record in a + dynamic Bloom filter row.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + dynamic Bloom filter, as defined in the INFOCOM 2006 paper. +

    + A dynamic Bloom filter (DBF) makes use of a s * m bit matrix but + each of the s rows is a standard Bloom filter. The creation + process of a DBF is iterative. At the start, the DBF is a 1 * m + bit matrix, i.e., it is composed of a single standard Bloom filter. + It assumes that nr elements are recorded in the + initial bit vector, where nr <= n (n is + the cardinality of the set A to record in the filter). +

    + As the size of A grows during the execution of the application, + several keys must be inserted in the DBF. When inserting a key into the DBF, + one must first get an active Bloom filter in the matrix. A Bloom filter is + active when the number of recorded keys, nr, is + strictly less than the current cardinality of A, n. + If an active Bloom filter is found, the key is inserted and + nr is incremented by one. On the other hand, if there + is no active Bloom filter, a new one is created (i.e., a new row is added to + the matrix) according to the current size of A and the element + is added in this new Bloom filter and the nr value of + this new Bloom filter is set to one. A given key is said to belong to the + DBF if the k positions are set to one in one of the matrix rows. +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + @see BloomFilter A Bloom filter + + @see Theory and Network Applications of Dynamic Bloom Filters]]> + + + + + + + + + + + this filter. + @param nbHash The number of hash functions to consider. + @param hashType type of the hashing function (see {@link Hash}).]]> + + + + + + this filter. + @param key The key to add.]]> + + + + + + this filter. + @param key The key to test. + @return boolean True if the specified key belongs to this filter. + False otherwise.]]> + + + + + + this filter and a specified filter. +

    + Invariant: The result is assigned to this filter. + @param filter The filter to AND with.]]> + + + + + + this filter and a specified filter. +

    + Invariant: The result is assigned to this filter. + @param filter The filter to OR with.]]> + + + + + + this filter and a specified filter. +

    + Invariant: The result is assigned to this filter. + @param filter The filter to XOR with.]]> + + + + + this filter. +

    + The result is assigned to this filter.]]> + + + + + + this filter. + @param keys The list of keys.]]> + + + + + + this filter. + @param keys The collection of keys.]]> + + + + + + this filter. + @param keys The array of keys.]]> + + + + + + + + + + + + + this filter.]]> + + + + + + + + + + + + + + + + + + + + A filter is a data structure which aims at offering a lossy summary of a set A. The + key idea is to map entries of A (also called keys) into several positions + in a vector through the use of several hash functions. +

    + Typically, a filter will be implemented as a Bloom filter (or a Bloom filter extension). +

    + It must be extended in order to define the real behavior. + + @see Key The general behavior of a key + @see HashFunction A hash function]]> + + + + + + + + + Builds a hash function that must obey to a given maximum number of returned values and a highest value. + @param maxValue The maximum highest returned value. + @param nbHash The number of resulting hashed values. + @param hashType type of the hashing function (see {@link Hash}).]]> + + + + + this hash function. A NOOP]]> + + + + + + + + + + + + + + + + + + + + + + + + + Builds a key with a default weight. + @param value The byte value of this key.]]> + + + + + + Builds a key with a specified weight. + @param value The value of this key. + @param weight The weight associated to this key.]]> + + + + + + + + + + + + this key.]]> + + + + + this key.]]> + + + + + + this key with a specified value. + @param weight The increment.]]> + + + + + this key by one.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The idea is to randomly select a bit to reset.]]> + + + + + + The idea is to select the bit to reset that will generate the minimum + number of false negative.]]> + + + + + + The idea is to select the bit to reset that will remove the maximum number + of false positive.]]> + + + + + + The idea is to select the bit to reset that will, at the same time, remove + the maximum number of false positve while minimizing the amount of false + negative generated.]]> + + + + + Originally created by + European Commission One-Lab Project 034819.]]> + + + + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + this retouched Bloom filter. +

    + Invariant: if the false positive is null, nothing happens. + @param key The false positive key to add.]]> + + + + + + this retouched Bloom filter. + @param coll The collection of false positive.]]> + + + + + + this retouched Bloom filter. + @param keys The list of false positive.]]> + + + + + + this retouched Bloom filter. + @param keys The array of false positive.]]> + + + + + + + this retouched Bloom filter. + @param scheme The selective clearing scheme to apply.]]> + + + + + + + + + + + + retouched Bloom filter, as defined in the CoNEXT 2006 paper. +

    + It allows the removal of selected false positives at the cost of introducing + random false negatives, and with the benefit of eliminating some random false + positives at the same time. + +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + @see BloomFilter A Bloom filter + @see RemoveScheme The different selective clearing algorithms + + @see Retouched Bloom Filters: Allowing Networked Applications to Trade Off Selected False Positives Against False Negatives]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + length, and + the provided seed value + @param bytes input bytes + @param length length of the valid bytes to consider + @param initval seed value + @return hash value]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The best hash table sizes are powers of 2. There is no need to do mod + a prime (mod is sooo slow!). If you need less than 32 bits, use a bitmask. + For example, if you need only 10 bits, do + h = (h & hashmask(10)); + In which case, the hash table should have hashsize(10) elements. + +

    If you are hashing n strings byte[][] k, do it like this: + for (int i = 0, h = 0; i < n; ++i) h = hash( k[i], h); + +

    By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this + code any way you wish, private, educational, or commercial. It's free. + +

    Use for hash table lookup, or anything where one collision in 2^^32 is + acceptable. Do NOT use for cryptographic purposes.]]> + + + + + + + + + + + lookup3.c, by Bob Jenkins, May 2006, Public Domain. + + You can use this free for any purpose. It's in the public domain. + It has no warranty. + + + @see lookup3.c + @see Hash Functions (and how this + function compares to others such as CRC, MD?, etc + @see Has update on the + Dr. Dobbs Article]]> + + + + + + + + + + + + + + + + The C version of MurmurHash 2.0 found at that site was ported + to Java by Andrzej Bialecki (ab at getopt org).

    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobTracker, + as {@link JobTracker.State} + + @return the current state of the JobTracker.]]> + + + + + JobTracker + + @return the size of heap memory used by the JobTracker]]> + + + + + JobTracker + + @return the configured size of max heap memory that can be used by the JobTracker]]> + + + + + + + + + + + + ClusterStatus provides clients with information such as: +
      +
    1. + Size of the cluster. +
    2. +
    3. + Name of the trackers. +
    4. +
    5. + Task capacity of the cluster. +
    6. +
    7. + The number of currently running map & reduce tasks. +
    8. +
    9. + State of the JobTracker. +
    10. +

    + +

    Clients can query for the latest ClusterStatus, via + {@link JobClient#getClusterStatus()}.

    + + @see JobClient]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Counters represent global counters, defined either by the + Map-Reduce framework or applications. Each Counter can be of + any {@link Enum} type.

    + +

    Counters are bunched into {@link Group}s, each comprising of + counters from a particular Enum class. + @deprecated Use {@link org.apache.hadoop.mapreduce.Counters} instead.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Group of counters, comprising of counters from a particular + counter {@link Enum} class. + +

    Grouphandles localization of the class name and the + counter names.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileInputFormat implementations can override this and return + false to ensure that individual input files are never split-up + so that {@link Mapper}s process entire files. + + @param fs the file system that the file is on + @param filename the file name to check + @return is this file splitable?]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileInputFormat is the base class for all file-based + InputFormats. This provides a generic implementation of + {@link #getSplits(JobConf, int)}. + Subclasses of FileInputFormat can also override the + {@link #isSplitable(FileSystem, Path)} method to ensure input-files are + not split-up and are processed as a whole by {@link Mapper}s. + @deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat} + instead.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the job output should be compressed, + false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tasks' Side-Effect Files + +

    Note: The following is valid only if the {@link OutputCommitter} + is {@link FileOutputCommitter}. If OutputCommitter is not + a FileOutputCommitter, the task's temporary output + directory is same as {@link #getOutputPath(JobConf)} i.e. + ${mapred.output.dir}$

    + +

    Some applications need to create/write-to side-files, which differ from + the actual job-outputs. + +

    In such cases there could be issues with 2 instances of the same TIP + (running simultaneously e.g. speculative tasks) trying to open/write-to the + same file (path) on HDFS. Hence the application-writer will have to pick + unique names per task-attempt (e.g. using the attemptid, say + attempt_200709221812_0001_m_000000_0), not just per TIP.

    + +

    To get around this the Map-Reduce framework helps the application-writer + out by maintaining a special + ${mapred.output.dir}/_temporary/_${taskid} + sub-directory for each task-attempt on HDFS where the output of the + task-attempt goes. On successful completion of the task-attempt the files + in the ${mapred.output.dir}/_temporary/_${taskid} (only) + are promoted to ${mapred.output.dir}. Of course, the + framework discards the sub-directory of unsuccessful task-attempts. This + is completely transparent to the application.

    + +

    The application-writer can take advantage of this by creating any + side-files required in ${mapred.work.output.dir} during execution + of his reduce-task i.e. via {@link #getWorkOutputPath(JobConf)}, and the + framework will move them out similarly - thus she doesn't have to pick + unique paths per task-attempt.

    + +

    Note: the value of ${mapred.work.output.dir} during + execution of a particular task-attempt is actually + ${mapred.output.dir}/_temporary/_{$taskid}, and this value is + set by the map-reduce framework. So, just create any side-files in the + path returned by {@link #getWorkOutputPath(JobConf)} from map/reduce + task to take advantage of this feature.

    + +

    The entire discussion holds true for maps of jobs with + reducer=NONE (i.e. 0 reduces) since output of the map, in that case, + goes directly to HDFS.

    + + @return the {@link Path} to the task's temporary output directory + for the map-reduce job.]]> +
    +
    + + + + + + + + + + + + + The generated name can be used to create custom files from within the + different tasks for the job, the names for different tasks will not collide + with each other.

    + +

    The given name is postfixed with the task type, 'm' for maps, 'r' for + reduces and the task partition number. For example, give a name 'test' + running on the first map o the job the generated name will be + 'test-m-00000'.

    + + @param conf the configuration for the job. + @param name the name to make unique. + @return a unique name accross all tasks of the job.]]> +
    +
    + + + + + The path can be used to create custom files from within the map and + reduce tasks. The path name will be unique for each task. The path parent + will be the job output directory.

    ls + +

    This method uses the {@link #getUniqueName} method to make the file name + unique for the task.

    + + @param conf the configuration for the job. + @param name the name for the file. + @return a unique path accross all tasks of the job.]]> +
    +
    + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Each {@link InputSplit} is then assigned to an individual {@link Mapper} + for processing.

    + +

    Note: The split is a logical split of the inputs and the + input files are not physically split into chunks. For e.g. a split could + be <input-file-path, start, offset> tuple. + + @param job job configuration. + @param numSplits the desired number of splits, a hint. + @return an array of {@link InputSplit}s for the job.]]> + + + + + + + + + It is the responsibility of the RecordReader to respect + record boundaries while processing the logical split to present a + record-oriented view to the individual task.

    + + @param split the {@link InputSplit} + @param job the job that this split belongs to + @return a {@link RecordReader}]]> +
    +
    + + InputFormat describes the input-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the InputFormat of the + job to:

    +

      +
    1. + Validate the input-specification of the job. +
    2. + Split-up the input file(s) into logical {@link InputSplit}s, each of + which is then assigned to an individual {@link Mapper}. +
    3. +
    4. + Provide the {@link RecordReader} implementation to be used to glean + input records from the logical InputSplit for processing by + the {@link Mapper}. +
    5. +
    + +

    The default behavior of file-based {@link InputFormat}s, typically + sub-classes of {@link FileInputFormat}, is to split the + input into logical {@link InputSplit}s based on the total size, in + bytes, of the input files. However, the {@link FileSystem} blocksize of + the input files is treated as an upper bound for input splits. A lower bound + on the split size can be set via + + mapred.min.split.size.

    + +

    Clearly, logical splits based on input-size is insufficient for many + applications since record boundaries are to respected. In such cases, the + application has to also implement a {@link RecordReader} on whom lies the + responsibilty to respect record-boundaries and present a record-oriented + view of the logical InputSplit to the individual task. + + @see InputSplit + @see RecordReader + @see JobClient + @see FileInputFormat + @deprecated Use {@link org.apache.hadoop.mapreduce.InputFormat} instead.]]> + + + + + + + + + + InputSplit. + + @return the number of bytes in the input split. + @throws IOException]]> + + + + + + InputSplit is + located as an array of Strings. + @throws IOException]]> + + + + InputSplit represents the data to be processed by an + individual {@link Mapper}. + +

    Typically, it presents a byte-oriented view on the input and is the + responsibility of {@link RecordReader} of the job to process this and present + a record-oriented view. + + @see InputFormat + @see RecordReader + @deprecated Use {@link org.apache.hadoop.mapreduce.InputSplit} instead.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobClient.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + jobid doesn't correspond to any known job. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobClient is the primary interface for the user-job to interact + with the {@link JobTracker}. + + JobClient provides facilities to submit jobs, track their + progress, access component-tasks' reports/logs, get the Map-Reduce cluster + status information etc. + +

    The job submission process involves: +

      +
    1. + Checking the input and output specifications of the job. +
    2. +
    3. + Computing the {@link InputSplit}s for the job. +
    4. +
    5. + Setup the requisite accounting information for the {@link DistributedCache} + of the job, if necessary. +
    6. +
    7. + Copying the job's jar and configuration to the map-reduce system directory + on the distributed file-system. +
    8. +
    9. + Submitting the job to the JobTracker and optionally monitoring + it's status. +
    10. +

    + + Normally the user creates the application, describes various facets of the + job via {@link JobConf} and then uses the JobClient to submit + the job and monitor its progress. + +

    Here is an example on how to use JobClient:

    +

    +     // Create a new JobConf
    +     JobConf job = new JobConf(new Configuration(), MyJob.class);
    +     
    +     // Specify various job-specific parameters     
    +     job.setJobName("myjob");
    +     
    +     job.setInputPath(new Path("in"));
    +     job.setOutputPath(new Path("out"));
    +     
    +     job.setMapperClass(MyJob.MyMapper.class);
    +     job.setReducerClass(MyJob.MyReducer.class);
    +
    +     // Submit the job, then poll for progress until the job is complete
    +     JobClient.runJob(job);
    + 

    + +

    Job Control

    + +

    At times clients would chain map-reduce jobs to accomplish complex tasks + which cannot be done via a single map-reduce job. This is fairly easy since + the output of the job, typically, goes to distributed file-system and that + can be used as the input for the next job.

    + +

    However, this also means that the onus on ensuring jobs are complete + (success/failure) lies squarely on the clients. In such situations the + various job-control options are: +

      +
    1. + {@link #runJob(JobConf)} : submits the job and returns only after + the job has completed. +
    2. +
    3. + {@link #submitJob(JobConf)} : only submits the job, then poll the + returned handle to the {@link RunningJob} to query status and make + scheduling decisions. +
    4. +
    5. + {@link JobConf#setJobEndNotificationURI(String)} : setup a notification + on job-completion, thus avoiding polling. +
    6. +

    + + @see JobConf + @see ClusterStatus + @see Tool + @see DistributedCache]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If the parameter {@code loadDefaults} is false, the new instance + will not load resources from the default files. + + @param loadDefaults specifies whether to load from the default files]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if framework should keep the intermediate files + for failed tasks, false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the outputs of the maps are to be compressed, + false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This comparator should be provided if the equivalence rules for keys + for sorting the intermediates are different from those for grouping keys + before each call to + {@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.

    + +

    For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed + in a single call to the reduce function if K1 and K2 compare as equal.

    + +

    Since {@link #setOutputKeyComparatorClass(Class)} can be used to control + how keys are sorted, this can be used in conjunction to simulate + secondary sort on values.

    + +

    Note: This is not a guarantee of the reduce sort being + stable in any sense. (In any case, with the order of available + map-outputs to the reduce being non-deterministic, it wouldn't make + that much sense.)

    + + @param theClass the comparator class to be used for grouping keys. + It should implement RawComparator. + @see #setOutputKeyComparatorClass(Class)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + combiner class used to combine map-outputs + before being sent to the reducers. Typically the combiner is same as the + the {@link Reducer} for the job i.e. {@link #getReducerClass()}. + + @return the user-defined combiner class used to combine map-outputs.]]> + + + + + + combiner class used to combine map-outputs + before being sent to the reducers. + +

    The combiner is an application-specified aggregation operation, which + can help cut down the amount of data transferred between the + {@link Mapper} and the {@link Reducer}, leading to better performance.

    + +

    The framework may invoke the combiner 0, 1, or multiple times, in both + the mapper and reducer tasks. In general, the combiner is called as the + sort/merge result is written to disk. The combiner must: +

      +
    • be side-effect free
    • +
    • have the same input and output key types and the same input and + output value types
    • +

    + +

    Typically the combiner is same as the Reducer for the + job i.e. {@link #setReducerClass(Class)}.

    + + @param theClass the user-defined combiner class used to combine + map-outputs.]]> +
    +
    + + + true. + + @return true if speculative execution be used for this job, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on, else false.]]> + + + + + true. + + @return true if speculative execution be + used for this job for map tasks, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on for map tasks, + else false.]]> + + + + + true. + + @return true if speculative execution be used + for reduce tasks for this job, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on for reduce tasks, + else false.]]> + + + + + 1. + + @return the number of reduce tasks for this job.]]> + + + + + + Note: This is only a hint to the framework. The actual + number of spawned map tasks depends on the number of {@link InputSplit}s + generated by the job's {@link InputFormat#getSplits(JobConf, int)}. + + A custom {@link InputFormat} is typically used to accurately control + the number of map tasks for the job.

    + +

    How many maps?

    + +

    The number of maps is usually driven by the total size of the inputs + i.e. total number of blocks of the input files.

    + +

    The right level of parallelism for maps seems to be around 10-100 maps + per-node, although it has been set up to 300 or so for very cpu-light map + tasks. Task setup takes awhile, so it is best if the maps take at least a + minute to execute.

    + +

    The default behavior of file-based {@link InputFormat}s is to split the + input into logical {@link InputSplit}s based on the total size, in + bytes, of input files. However, the {@link FileSystem} blocksize of the + input files is treated as an upper bound for input splits. A lower bound + on the split size can be set via + + mapred.min.split.size.

    + +

    Thus, if you expect 10TB of input data and have a blocksize of 128MB, + you'll end up with 82,000 maps, unless {@link #setNumMapTasks(int)} is + used to set it even higher.

    + + @param n the number of map tasks for this job. + @see InputFormat#getSplits(JobConf, int) + @see FileInputFormat + @see FileSystem#getDefaultBlockSize() + @see FileStatus#getBlockSize()]]> +
    +
    + + + 1. + + @return the number of reduce tasks for this job.]]> + + + + + + How many reduces? + +

    The right number of reduces seems to be 0.95 or + 1.75 multiplied by (<no. of nodes> * + + mapred.tasktracker.reduce.tasks.maximum). +

    + +

    With 0.95 all of the reduces can launch immediately and + start transfering map outputs as the maps finish. With 1.75 + the faster nodes will finish their first round of reduces and launch a + second wave of reduces doing a much better job of load balancing.

    + +

    Increasing the number of reduces increases the framework overhead, but + increases load balancing and lowers the cost of failures.

    + +

    The scaling factors above are slightly less than whole numbers to + reserve a few reduce slots in the framework for speculative-tasks, failures + etc.

    + +

    Reducer NONE

    + +

    It is legal to set the number of reduce-tasks to zero.

    + +

    In this case the output of the map-tasks directly go to distributed + file-system, to the path set by + {@link FileOutputFormat#setOutputPath(JobConf, Path)}. Also, the + framework doesn't sort the map-outputs before writing it out to HDFS.

    + + @param n the number of reduce tasks for this job.]]> +
    +
    + + + mapred.map.max.attempts + property. If this property is not already set, the default is 4 attempts. + + @return the max number of attempts per map task.]]> + + + + + + + + + + + mapred.reduce.max.attempts + property. If this property is not already set, the default is 4 attempts. + + @return the max number of attempts per reduce task.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + noFailures, the + tasktracker is blacklisted for this job. + + @param noFailures maximum no. of failures of a given job per tasktracker.]]> + + + + + blacklisted for this job. + + @return the maximum no. of failures of a given job per tasktracker.]]> + + + + + failed. + + Defaults to zero, i.e. any failed map-task results in + the job being declared as {@link JobStatus#FAILED}. + + @return the maximum percentage of map tasks that can fail without + the job being aborted.]]> + + + + + + failed. + + @param percent the maximum percentage of map tasks that can fail without + the job being aborted.]]> + + + + + failed. + + Defaults to zero, i.e. any failed reduce-task results + in the job being declared as {@link JobStatus#FAILED}. + + @return the maximum percentage of reduce tasks that can fail without + the job being aborted.]]> + + + + + + failed. + + @param percent the maximum percentage of reduce tasks that can fail without + the job being aborted.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The debug script can aid debugging of failed map tasks. The script is + given task's stdout, stderr, syslog, jobconf files as arguments.

    + +

    The debug command, run on the node where the map failed, is:

    +

    + $script $stdout $stderr $syslog $jobconf. +

    + +

    The script file is distributed through {@link DistributedCache} + APIs. The script needs to be symlinked.

    + +

    Here is an example on how to submit a script +

    + job.setMapDebugScript("./myscript");
    + DistributedCache.createSymlink(job);
    + DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
    + 

    + + @param mDbgScript the script name]]> +
    +
    + + + + + + + + + The debug script can aid debugging of failed reduce tasks. The script + is given task's stdout, stderr, syslog, jobconf files as arguments.

    + +

    The debug command, run on the node where the map failed, is:

    +

    + $script $stdout $stderr $syslog $jobconf. +

    + +

    The script file is distributed through {@link DistributedCache} + APIs. The script file needs to be symlinked

    + +

    Here is an example on how to submit a script +

    + job.setReduceDebugScript("./myscript");
    + DistributedCache.createSymlink(job);
    + DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
    + 

    + + @param rDbgScript the script name]]> +
    +
    + + + + + + + + null if it hasn't + been set. + @see #setJobEndNotificationURI(String)]]> + + + + + + The uri can contain 2 special parameters: $jobId and + $jobStatus. Those, if present, are replaced by the job's + identifier and completion-status respectively.

    + +

    This is typically used by application-writers to implement chaining of + Map-Reduce jobs in an asynchronous manner.

    + + @param uri the job end notification uri + @see JobStatus + @see Job Completion and Chaining]]> +
    +
    + + + + When a job starts, a shared directory is created at location + + ${mapred.local.dir}/taskTracker/jobcache/$jobid/work/ . + This directory is exposed to the users through + job.local.dir . + So, the tasks can use this space + as scratch space and share files among them.

    + This value is available as System property also. + + @return The localized job specific shared directory]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If a job doesn't specify its virtual memory requirement by setting + {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to {@link #DISABLED_MEMORY_LIMIT}, + tasks are assured a memory limit set to this property. This property is + disabled by default, and if not explicitly set to a valid value by the + administrators and if a job doesn't specify its virtual memory + requirements, the job's tasks will not be assured anything and may be + killed by a TT that intends to control the total memory usage of the tasks + via memory management functionality. + +

    + + This value should in general be less than the cluster-wide configuration + {@link #UPPER_LIMIT_ON_TASK_VMEM_PROPERTY} . If not or if it not set, + TaskTracker's memory management may be disabled and a scheduler's memory + based scheduling decisions will be affected. Please refer to the + documentation of the configured scheduler to see how this property is used.]]> + + + + + + + This value will be used by TaskTrackers for monitoring the memory usage of + tasks of this jobs. If a TaskTracker's memory management functionality is + enabled, each task of this job will be allowed to use a maximum virtual + memory specified by this property. If the task's memory usage goes over + this value, the task will be failed by the TT. If not set, the cluster-wide + configuration {@link #MAPRED_TASK_DEFAULT_MAXVMEM_PROPERTY} is used as the + default value for memory requirements. If this property cascaded with + {@link #MAPRED_TASK_DEFAULT_MAXVMEM_PROPERTY} becomes equal to -1, job's + tasks will not be assured anything and may be killed by a TT that intends + to control the total memory usage of the tasks via memory management + functionality. If the memory management functionality is disabled on a TT, + this value is ignored. + +

    + + This value should also be not more than the cluster-wide configuration + {@link #UPPER_LIMIT_ON_TASK_VMEM_PROPERTY} which has to be set by the site + administrators. + +

    + + This value may be used by schedulers that support scheduling based on job's + memory requirements. In general, a task of this job will be scheduled on a + TaskTracker only if the amount of virtual memory still unoccupied on the + TaskTracker is greater than or equal to this value. But different + schedulers can take different decisions. Please refer to the documentation + of the scheduler being configured to see if it does memory based scheduling + and if it does, how this property is used by that scheduler. + + @see #setMaxVirtualMemoryForTask(long) + @see #getMaxVirtualMemoryForTask()]]> + + + + + + + This value may be used by schedulers that support scheduling based on job's + memory requirements. In general, a task of this job will be scheduled on a + TaskTracker, only if the amount of physical memory still unoccupied on the + TaskTracker is greater than or equal to this value. But different + schedulers can take different decisions. Please refer to the documentation + of the scheduler being configured to see how it does memory based + scheduling and how this variable is used by that scheduler. + + @see #setMaxPhysicalMemoryForTask(long) + @see #getMaxPhysicalMemoryForTask()]]> + + + + + + + If it is not set on a TaskTracker, TaskTracker's memory management will be + disabled.]]> + + + + JobConf is the primary interface for a user to describe a + map-reduce job to the Hadoop framework for execution. The framework tries to + faithfully execute the job as-is described by JobConf, however: +

      +
    1. + Some configuration parameters might have been marked as + + final by administrators and hence cannot be altered. +
    2. +
    3. + While some job parameters are straight-forward to set + (e.g. {@link #setNumReduceTasks(int)}), some parameters interact subtly + rest of the framework and/or job-configuration and is relatively more + complex for the user to control finely (e.g. {@link #setNumMapTasks(int)}). +
    4. +

    + +

    JobConf typically specifies the {@link Mapper}, combiner + (if any), {@link Partitioner}, {@link Reducer}, {@link InputFormat} and + {@link OutputFormat} implementations to be used etc. + +

    Optionally JobConf is used to specify other advanced facets + of the job such as Comparators to be used, files to be put in + the {@link DistributedCache}, whether or not intermediate and/or job outputs + are to be compressed (and how), debugability via user-provided scripts + ( {@link #setMapDebugScript(String)}/{@link #setReduceDebugScript(String)}), + for doing post-processing on task logs, task's stdout, stderr, syslog. + and etc.

    + +

    Here is an example on how to configure a job via JobConf:

    +

    +     // Create a new JobConf
    +     JobConf job = new JobConf(new Configuration(), MyJob.class);
    +     
    +     // Specify various job-specific parameters     
    +     job.setJobName("myjob");
    +     
    +     FileInputFormat.setInputPaths(job, new Path("in"));
    +     FileOutputFormat.setOutputPath(job, new Path("out"));
    +     
    +     job.setMapperClass(MyJob.MyMapper.class);
    +     job.setCombinerClass(MyJob.MyReducer.class);
    +     job.setReducerClass(MyJob.MyReducer.class);
    +     
    +     job.setInputFormat(SequenceFileInputFormat.class);
    +     job.setOutputFormat(SequenceFileOutputFormat.class);
    + 

    + + @see JobClient + @see ClusterStatus + @see Tool + @see DistributedCache + @deprecated Use {@link Configuration} instead]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + .]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + any job + run on the jobtracker started at 200707121733, we would use : +
     
    + JobID.getTaskIDsPattern("200707121733", null);
    + 
    + which will return : +
     "job_200707121733_[0-9]*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @return a regex pattern matching JobIDs]]> +
    +
    + + + An example JobID is : + job_200707121733_0003 , which represents the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse JobID strings, but rather + use appropriate constructors or {@link #forName(String)} method. + + @see TaskID + @see TaskAttemptID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + "N/A" + + @return Scheduling information associated to particular Job Queue]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + zero. + + @param conf configuration for the JobTracker. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Output pairs need not be of the same types as input pairs. A given + input pair may map to zero or many output pairs. Output pairs are + collected with calls to + {@link OutputCollector#collect(Object,Object)}.

    + +

    Applications can use the {@link Reporter} provided to report progress + or just indicate that they are alive. In scenarios where the application + takes an insignificant amount of time to process individual key/value + pairs, this is crucial since the framework might assume that the task has + timed-out and kill that task. The other way of avoiding this is to set + + mapred.task.timeout to a high-enough value (or even zero for no + time-outs).

    + + @param key the input key. + @param value the input value. + @param output collects mapped keys and values. + @param reporter facility to report progress.]]> +
    + + + Maps are the individual tasks which transform input records into a + intermediate records. The transformed intermediate records need not be of + the same type as the input records. A given input pair may map to zero or + many output pairs.

    + +

    The Hadoop Map-Reduce framework spawns one map task for each + {@link InputSplit} generated by the {@link InputFormat} for the job. + Mapper implementations can access the {@link JobConf} for the + job via the {@link JobConfigurable#configure(JobConf)} and initialize + themselves. Similarly they can use the {@link Closeable#close()} method for + de-initialization.

    + +

    The framework then calls + {@link #map(Object, Object, OutputCollector, Reporter)} + for each key/value pair in the InputSplit for that task.

    + +

    All intermediate values associated with a given output key are + subsequently grouped by the framework, and passed to a {@link Reducer} to + determine the final output. Users can control the grouping by specifying + a Comparator via + {@link JobConf#setOutputKeyComparatorClass(Class)}.

    + +

    The grouped Mapper outputs are partitioned per + Reducer. Users can control which keys (and hence records) go to + which Reducer by implementing a custom {@link Partitioner}. + +

    Users can optionally specify a combiner, via + {@link JobConf#setCombinerClass(Class)}, to perform local aggregation of the + intermediate outputs, which helps to cut down the amount of data transferred + from the Mapper to the Reducer. + +

    The intermediate, grouped outputs are always stored in + {@link SequenceFile}s. Applications can specify if and how the intermediate + outputs are to be compressed and which {@link CompressionCodec}s are to be + used via the JobConf.

    + +

    If the job has + zero + reduces then the output of the Mapper is directly written + to the {@link FileSystem} without grouping by keys.

    + +

    Example:

    +

    +     public class MyMapper<K extends WritableComparable, V extends Writable> 
    +     extends MapReduceBase implements Mapper<K, V, K, V> {
    +     
    +       static enum MyCounters { NUM_RECORDS }
    +       
    +       private String mapTaskId;
    +       private String inputFile;
    +       private int noRecords = 0;
    +       
    +       public void configure(JobConf job) {
    +         mapTaskId = job.get("mapred.task.id");
    +         inputFile = job.get("map.input.file");
    +       }
    +       
    +       public void map(K key, V val,
    +                       OutputCollector<K, V> output, Reporter reporter)
    +       throws IOException {
    +         // Process the <key, value> pair (assume this takes a while)
    +         // ...
    +         // ...
    +         
    +         // Let the framework know that we are alive, and kicking!
    +         // reporter.progress();
    +         
    +         // Process some more
    +         // ...
    +         // ...
    +         
    +         // Increment the no. of <key, value> pairs processed
    +         ++noRecords;
    +
    +         // Increment counters
    +         reporter.incrCounter(NUM_RECORDS, 1);
    +        
    +         // Every 100 records update application-level status
    +         if ((noRecords%100) == 0) {
    +           reporter.setStatus(mapTaskId + " processed " + noRecords + 
    +                              " from input-file: " + inputFile); 
    +         }
    +         
    +         // Output the result
    +         output.collect(key, val);
    +       }
    +     }
    + 

    + +

    Applications may write a custom {@link MapRunnable} to exert greater + control on map processing e.g. multi-threaded Mappers etc.

    + + @see JobConf + @see InputFormat + @see Partitioner + @see Reducer + @see MapReduceBase + @see MapRunnable + @see SequenceFile + @deprecated Use {@link org.apache.hadoop.mapreduce.Mapper} instead.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + Provides default no-op implementations for a few methods, most non-trivial + applications need to override some of them.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + <key, value> pairs. + +

    Mapping of input records to output records is complete when this method + returns.

    + + @param input the {@link RecordReader} to read the input records. + @param output the {@link OutputCollector} to collect the outputrecords. + @param reporter {@link Reporter} to report progress, status-updates etc. + @throws IOException]]> +
    +
    + + Custom implementations of MapRunnable can exert greater + control on map processing e.g. multi-threaded, asynchronous mappers etc.

    + + @see Mapper + @deprecated Use {@link org.apache.hadoop.mapreduce.Mapper} instead.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + nearly + equal content length.
    + Subclasses implement {@link #getRecordReader(InputSplit, JobConf, Reporter)} + to construct RecordReader's for MultiFileSplit's. + @see MultiFileSplit + @deprecated Use {@link org.apache.hadoop.mapred.lib.CombineFileInputFormat} instead]]> +
    +
    + + + + + + + + + + + + + MultiFileSplit can be used to implement {@link RecordReader}'s, with + reading one record per file. + @see FileSplit + @see MultiFileInputFormat + @deprecated Use {@link org.apache.hadoop.mapred.lib.CombineFileSplit} instead]]> + + + + + + + + + + + + + + + <key, value> pairs output by {@link Mapper}s + and {@link Reducer}s. + +

    OutputCollector is the generalization of the facility + provided by the Map-Reduce framework to collect data output by either the + Mapper or the Reducer i.e. intermediate outputs + or the output of the job.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + OutputCommitter describes the commit of task output for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the OutputCommitter of + the job to:

    +

      +
    1. + Setup the job during initialization. For example, create the temporary + output directory for the job during the initialization of the job. +
    2. +
    3. + Cleanup the job after the job completion. For example, remove the + temporary output directory after the job completion. +
    4. +
    5. + Setup the task temporary output. +
    6. +
    7. + Check whether a task needs a commit. This is to avoid the commit + procedure if a task does not need commit. +
    8. +
    9. + Commit of the task output. +
    10. +
    11. + Discard the task commit. +
    12. +
    + + @see FileOutputCommitter + @see JobContext + @see TaskAttemptContext + @deprecated Use {@link org.apache.hadoop.mapreduce.OutputCommitter} instead.]]> +
    +
    + + + + + + + + + + + + + + + + + + + This is to validate the output specification for the job when it is + a job is submitted. Typically checks that it does not already exist, + throwing an exception when it already exists, so that output is not + overwritten.

    + + @param ignored + @param job job configuration. + @throws IOException when output should not be attempted]]> +
    +
    + + OutputFormat describes the output-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the OutputFormat of the + job to:

    +

      +
    1. + Validate the output-specification of the job. For e.g. check that the + output directory doesn't already exist. +
    2. + Provide the {@link RecordWriter} implementation to be used to write out + the output files of the job. Output files are stored in a + {@link FileSystem}. +
    3. +
    + + @see RecordWriter + @see JobConf + @deprecated Use {@link org.apache.hadoop.mapreduce.OutputFormat} instead.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + Typically a hash function on a all or a subset of the key.

    + + @param key the key to be paritioned. + @param value the entry value. + @param numPartitions the total number of partitions. + @return the partition number for the key.]]> +
    +
    + + Partitioner controls the partitioning of the keys of the + intermediate map-outputs. The key (or a subset of the key) is used to derive + the partition, typically by a hash function. The total number of partitions + is the same as the number of reduce tasks for the job. Hence this controls + which of the m reduce tasks the intermediate key (and hence the + record) is sent for reduction.

    + + @see Reducer + @deprecated Use {@link org.apache.hadoop.mapreduce.Partitioner} instead.]]> +
    +
    + + + + + + + + + + + + + + + + + + + true if there exists a key/value, + false otherwise. + @throws IOException]]> + + + + + + + + + + + + + + + RawKeyValueIterator is an iterator used to iterate over + the raw keys and values during sort/merge of intermediate data.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.0 to 1.0. + @throws IOException]]> + + + + RecordReader reads <key, value> pairs from an + {@link InputSplit}. + +

    RecordReader, typically, converts the byte-oriented view of + the input, provided by the InputSplit, and presents a + record-oriented view for the {@link Mapper} & {@link Reducer} tasks for + processing. It thus assumes the responsibility of processing record + boundaries and presenting the tasks with keys and values.

    + + @see InputSplit + @see InputFormat]]> +
    +
    + + + + + + + + + + + + + + + + RecordWriter to future operations. + + @param reporter facility to report progress. + @throws IOException]]> + + + + RecordWriter writes the output <key, value> pairs + to an output file. + +

    RecordWriter implementations write the job outputs to the + {@link FileSystem}. + + @see OutputFormat]]> + + + + + + + + + + + + + + + Reduces values for a given key. + +

    The framework calls this method for each + <key, (list of values)> pair in the grouped inputs. + Output values must be of the same type as input values. Input keys must + not be altered. The framework will reuse the key and value objects + that are passed into the reduce, therefore the application should clone + the objects they want to keep a copy of. In many cases, all values are + combined into zero or one value. +

    + +

    Output pairs are collected with calls to + {@link OutputCollector#collect(Object,Object)}.

    + +

    Applications can use the {@link Reporter} provided to report progress + or just indicate that they are alive. In scenarios where the application + takes an insignificant amount of time to process individual key/value + pairs, this is crucial since the framework might assume that the task has + timed-out and kill that task. The other way of avoiding this is to set + + mapred.task.timeout to a high-enough value (or even zero for no + time-outs).

    + + @param key the key. + @param values the list of values to reduce. + @param output to collect keys and combined values. + @param reporter facility to report progress.]]> +
    + + + The number of Reducers for the job is set by the user via + {@link JobConf#setNumReduceTasks(int)}. Reducer implementations + can access the {@link JobConf} for the job via the + {@link JobConfigurable#configure(JobConf)} method and initialize themselves. + Similarly they can use the {@link Closeable#close()} method for + de-initialization.

    + +

    Reducer has 3 primary phases:

    +
      +
    1. + +

      Shuffle

      + +

      Reducer is input the grouped output of a {@link Mapper}. + In the phase the framework, for each Reducer, fetches the + relevant partition of the output of all the Mappers, via HTTP. +

      +
    2. + +
    3. +

      Sort

      + +

      The framework groups Reducer inputs by keys + (since different Mappers may have output the same key) in this + stage.

      + +

      The shuffle and sort phases occur simultaneously i.e. while outputs are + being fetched they are merged.

      + +
      SecondarySort
      + +

      If equivalence rules for keys while grouping the intermediates are + different from those for grouping keys before reduction, then one may + specify a Comparator via + {@link JobConf#setOutputValueGroupingComparator(Class)}.Since + {@link JobConf#setOutputKeyComparatorClass(Class)} can be used to + control how intermediate keys are grouped, these can be used in conjunction + to simulate secondary sort on values.

      + + + For example, say that you want to find duplicate web pages and tag them + all with the url of the "best" known example. You would set up the job + like: +
        +
      • Map Input Key: url
      • +
      • Map Input Value: document
      • +
      • Map Output Key: document checksum, url pagerank
      • +
      • Map Output Value: url
      • +
      • Partitioner: by checksum
      • +
      • OutputKeyComparator: by checksum and then decreasing pagerank
      • +
      • OutputValueGroupingComparator: by checksum
      • +
      +
    4. + +
    5. +

      Reduce

      + +

      In this phase the + {@link #reduce(Object, Iterator, OutputCollector, Reporter)} + method is called for each <key, (list of values)> pair in + the grouped inputs.

      +

      The output of the reduce task is typically written to the + {@link FileSystem} via + {@link OutputCollector#collect(Object, Object)}.

      +
    6. +
    + +

    The output of the Reducer is not re-sorted.

    + +

    Example:

    +

    +     public class MyReducer<K extends WritableComparable, V extends Writable> 
    +     extends MapReduceBase implements Reducer<K, V, K, V> {
    +     
    +       static enum MyCounters { NUM_RECORDS }
    +        
    +       private String reduceTaskId;
    +       private int noKeys = 0;
    +       
    +       public void configure(JobConf job) {
    +         reduceTaskId = job.get("mapred.task.id");
    +       }
    +       
    +       public void reduce(K key, Iterator<V> values,
    +                          OutputCollector<K, V> output, 
    +                          Reporter reporter)
    +       throws IOException {
    +       
    +         // Process
    +         int noValues = 0;
    +         while (values.hasNext()) {
    +           V value = values.next();
    +           
    +           // Increment the no. of values for this key
    +           ++noValues;
    +           
    +           // Process the <key, value> pair (assume this takes a while)
    +           // ...
    +           // ...
    +           
    +           // Let the framework know that we are alive, and kicking!
    +           if ((noValues%10) == 0) {
    +             reporter.progress();
    +           }
    +         
    +           // Process some more
    +           // ...
    +           // ...
    +           
    +           // Output the <key, value> 
    +           output.collect(key, value);
    +         }
    +         
    +         // Increment the no. of <key, list of values> pairs processed
    +         ++noKeys;
    +         
    +         // Increment counters
    +         reporter.incrCounter(NUM_RECORDS, 1);
    +         
    +         // Every 100 keys update application-level status
    +         if ((noKeys%100) == 0) {
    +           reporter.setStatus(reduceTaskId + " processed " + noKeys);
    +         }
    +       }
    +     }
    + 

    + + @see Mapper + @see Partitioner + @see Reporter + @see MapReduceBase + @deprecated Use {@link org.apache.hadoop.mapreduce.Reducer} instead.]]> +
    +
    + + + + + + + + + + + + + + Counter of the given group/name.]]> + + + + + + + Counter of the given group/name.]]> + + + + + + + Enum. + @param amount A non-negative amount by which the counter is to + be incremented.]]> + + + + + + + + + + + + + + InputSplit that the map is reading from. + @throws UnsupportedOperationException if called outside a mapper]]> + + + + + + + + + {@link Mapper} and {@link Reducer} can use the Reporter + provided to report progress or just indicate that they are alive. In + scenarios where the application takes an insignificant amount of time to + process individual key/value pairs, this is crucial since the framework + might assume that the task has timed-out and kill that task. + +

    Applications can also update {@link Counters} via the provided + Reporter .

    + + @see Progressable + @see Counters]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + progress of the job's map-tasks, as a float between 0.0 + and 1.0. When all map tasks have completed, the function returns 1.0. + + @return the progress of the job's map-tasks. + @throws IOException]]> + + + + + + progress of the job's reduce-tasks, as a float between 0.0 + and 1.0. When all reduce tasks have completed, the function returns 1.0. + + @return the progress of the job's reduce-tasks. + @throws IOException]]> + + + + + + progress of the job's cleanup-tasks, as a float between 0.0 + and 1.0. When all cleanup tasks have completed, the function returns 1.0. + + @return the progress of the job's cleanup-tasks. + @throws IOException]]> + + + + + + progress of the job's setup-tasks, as a float between 0.0 + and 1.0. When all setup tasks have completed, the function returns 1.0. + + @return the progress of the job's setup-tasks. + @throws IOException]]> + + + + + + true if the job is complete, else false. + @throws IOException]]> + + + + + + true if the job succeeded, else false. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RunningJob is the user-interface to query for details on a + running Map-Reduce job. + +

    Clients can get hold of RunningJob via the {@link JobClient} + and then query the running-job for details such as name, configuration, + progress etc.

    + + @see JobClient]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This allows the user to specify the key class to be different + from the actual class ({@link BytesWritable}) used for writing

    + + @param conf the {@link JobConf} to modify + @param theClass the SequenceFile output key class.]]> +
    +
    + + + + + This allows the user to specify the value class to be different + from the actual class ({@link BytesWritable}) used for writing

    + + @param conf the {@link JobConf} to modify + @param theClass the SequenceFile output key class.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + f. The filtering criteria is + MD5(key) % f == 0.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + f using + the criteria record# % f == 0. + For example, if the frequency is 10, one out of 10 records is returned.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if auto increment + {@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS}. + false otherwise.]]> + + + + + + + + + + + + + true if auto increment + {@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS}. + false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Hadoop provides an optional mode of execution in which the bad records + are detected and skipped in further attempts. + +

    This feature can be used when map/reduce tasks crashes deterministically on + certain input. This happens due to bugs in the map/reduce function. The usual + course would be to fix these bugs. But sometimes this is not possible; + perhaps the bug is in third party libraries for which the source code is + not available. Due to this, the task never reaches to completion even with + multiple attempts and complete data for that task is lost.

    + +

    With this feature, only a small portion of data is lost surrounding + the bad record, which may be acceptable for some user applications. + see {@link SkipBadRecords#setMapperMaxSkipRecords(Configuration, long)}

    + +

    The skipping mode gets kicked off after certain no of failures + see {@link SkipBadRecords#setAttemptsToStartSkipping(Configuration, int)}

    + +

    In the skipping mode, the map/reduce task maintains the record range which + is getting processed at all times. Before giving the input to the + map/reduce function, it sends this record range to the Task tracker. + If task crashes, the Task tracker knows which one was the last reported + range. On further attempts that range get skipped.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + all task attempt IDs + of any jobtracker, in any job, of the first + map task, we would use : +
     
    + TaskAttemptID.getTaskAttemptIDsPattern(null, null, true, 1, null);
    + 
    + which will return : +
     "attempt_[^_]*_[0-9]*_m_000001_[0-9]*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @param isMap whether the tip is a map, or null + @param taskId taskId number, or null + @param attemptId the task attempt number, or null + @return a regex pattern matching TaskAttemptIDs]]> +
    +
    + + + An example TaskAttemptID is : + attempt_200707121733_0003_m_000005_0 , which represents the + zeroth task attempt for the fifth map task in the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse TaskAttemptID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the first map task + of any jobtracker, of any job, we would use : +

     
    + TaskID.getTaskIDsPattern(null, null, true, 1);
    + 
    + which will return : +
     "task_[^_]*_[0-9]*_m_000001*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @param isMap whether the tip is a map, or null + @param taskId taskId number, or null + @return a regex pattern matching TaskIDs]]> +
    + + + + + + + + An example TaskID is : + task_200707121733_0003_m_000005 , which represents the + fifth map task in the third job running at the jobtracker + started at 200707121733. +

    + Applications should never construct or parse TaskID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskAttemptID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hadoop.log.dir.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the Job was added.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ([,]*) + func ::= tbl(,"") + class ::= @see java.lang.Class#forName(java.lang.String) + path ::= @see org.apache.hadoop.fs.Path#Path(java.lang.String) + } + Reads expression from the mapred.join.expr property and + user-supplied join types from mapred.join.define.<ident> + types. Paths supplied to tbl are given as input paths to the + InputFormat class listed. + @see #compose(java.lang.String, java.lang.Class, java.lang.String...)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ,

    ) }]]> + + + + + + + + (tbl(,),tbl(,),...,tbl(,)) }]]> + + + + + + + + (tbl(,),tbl(,),...,tbl(,)) }]]> + + + + mapred.join.define.<ident> to a classname. In the expression + mapred.join.expr, the identifier will be assumed to be a + ComposableRecordReader. + mapred.join.keycomparator can be a classname used to compare keys + in the join. + @see JoinRecordReader + @see MultiFilterRecordReader]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ...... + }]]> + + + + + + + + + + + + + + + + + + + + + capacity children to position + id in the parent reader. + The id of a root CompositeRecordReader is -1 by convention, but relying + on this is not recommended.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + override(S1,S2,S3) will prefer values + from S3 over S2, and values from S2 over S1 for all keys + emitted from all sources.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [,,...,]]]> + + + + + + + out. + TupleWritable format: + {@code + ...... + }]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It has to be specified how key and values are passed from one element of + the chain to the next, by value or by reference. If a Mapper leverages the + assumed semantics that the key and values are not modified by the collector + 'by value' must be used. If the Mapper does not expect this semantics, as + an optimization to avoid serialization and deserialization 'by reference' + can be used. +

    + For the added Mapper the configuration given for it, + mapperConf, have precedence over the job's JobConf. This + precedence is in effect when the task is running. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainMapper, this is done by the addMapper for the last mapper in the chain +

    + + @param job job's JobConf to add the Mapper class. + @param klass the Mapper class to add. + @param inputKeyClass mapper input key class. + @param inputValueClass mapper input value class. + @param outputKeyClass mapper output key class. + @param outputValueClass mapper output value class. + @param byValue indicates if key/values should be passed by value + to the next Mapper in the chain, if any. + @param mapperConf a JobConf with the configuration for the Mapper + class. It is recommended to use a JobConf without default values using the + JobConf(boolean loadDefaults) constructor with FALSE.]]> + + + + + + + If this method is overriden super.configure(...) should be + invoked at the beginning of the overwriter method.]]> + + + + + + + + + + map(...) methods of the Mappers in the chain.]]> + + + + + + + If this method is overriden super.close() should be + invoked at the end of the overwriter method.]]> + + + + + The Mapper classes are invoked in a chained (or piped) fashion, the output of + the first becomes the input of the second, and so on until the last Mapper, + the output of the last Mapper will be written to the task's output. +

    + The key functionality of this feature is that the Mappers in the chain do not + need to be aware that they are executed in a chain. This enables having + reusable specialized Mappers that can be combined to perform composite + operations within a single task. +

    + Special care has to be taken when creating chains that the key/values output + by a Mapper are valid for the following Mapper in the chain. It is assumed + all Mappers and the Reduce in the chain use maching output and input key and + value classes as no conversion is done by the chaining code. +

    + Using the ChainMapper and the ChainReducer classes is possible to compose + Map/Reduce jobs that look like [MAP+ / REDUCE MAP*]. And + immediate benefit of this pattern is a dramatic reduction in disk IO. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainMapper, this is done by the addMapper for the last mapper in the chain. +

    + ChainMapper usage pattern: +

    +

    + ...
    + conf.setJobName("chain");
    + conf.setInputFormat(TextInputFormat.class);
    + conf.setOutputFormat(TextOutputFormat.class);
    + 

    + JobConf mapAConf = new JobConf(false); + ... + ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class, + Text.class, Text.class, true, mapAConf); +

    + JobConf mapBConf = new JobConf(false); + ... + ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class, + LongWritable.class, Text.class, false, mapBConf); +

    + JobConf reduceConf = new JobConf(false); + ... + ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class, + Text.class, Text.class, true, reduceConf); +

    + ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class, + LongWritable.class, Text.class, false, null); +

    + ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class, + LongWritable.class, LongWritable.class, true, null); +

    + FileInputFormat.setInputPaths(conf, inDir); + FileOutputFormat.setOutputPath(conf, outDir); + ... +

    + JobClient jc = new JobClient(conf); + RunningJob job = jc.submitJob(conf); + ... +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + It has to be specified how key and values are passed from one element of + the chain to the next, by value or by reference. If a Reducer leverages the + assumed semantics that the key and values are not modified by the collector + 'by value' must be used. If the Reducer does not expect this semantics, as + an optimization to avoid serialization and deserialization 'by reference' + can be used. +

    + For the added Reducer the configuration given for it, + reducerConf, have precedence over the job's JobConf. This + precedence is in effect when the task is running. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainReducer, this is done by the setReducer or the addMapper for the last + element in the chain. + + @param job job's JobConf to add the Reducer class. + @param klass the Reducer class to add. + @param inputKeyClass reducer input key class. + @param inputValueClass reducer input value class. + @param outputKeyClass reducer output key class. + @param outputValueClass reducer output value class. + @param byValue indicates if key/values should be passed by value + to the next Mapper in the chain, if any. + @param reducerConf a JobConf with the configuration for the Reducer + class. It is recommended to use a JobConf without default values using the + JobConf(boolean loadDefaults) constructor with FALSE.]]> + + + + + + + + + + + + + + It has to be specified how key and values are passed from one element of + the chain to the next, by value or by reference. If a Mapper leverages the + assumed semantics that the key and values are not modified by the collector + 'by value' must be used. If the Mapper does not expect this semantics, as + an optimization to avoid serialization and deserialization 'by reference' + can be used. +

    + For the added Mapper the configuration given for it, + mapperConf, have precedence over the job's JobConf. This + precedence is in effect when the task is running. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainMapper, this is done by the addMapper for the last mapper in the chain + . + + @param job chain job's JobConf to add the Mapper class. + @param klass the Mapper class to add. + @param inputKeyClass mapper input key class. + @param inputValueClass mapper input value class. + @param outputKeyClass mapper output key class. + @param outputValueClass mapper output value class. + @param byValue indicates if key/values should be passed by value + to the next Mapper in the chain, if any. + @param mapperConf a JobConf with the configuration for the Mapper + class. It is recommended to use a JobConf without default values using the + JobConf(boolean loadDefaults) constructor with FALSE.]]> + + + + + + + If this method is overriden super.configure(...) should be + invoked at the beginning of the overwriter method.]]> + + + + + + + + + + reduce(...) method of the Reducer with the + map(...) methods of the Mappers in the chain.]]> + + + + + + + If this method is overriden super.close() should be + invoked at the end of the overwriter method.]]> + + + + + For each record output by the Reducer, the Mapper classes are invoked in a + chained (or piped) fashion, the output of the first becomes the input of the + second, and so on until the last Mapper, the output of the last Mapper will + be written to the task's output. +

    + The key functionality of this feature is that the Mappers in the chain do not + need to be aware that they are executed after the Reducer or in a chain. + This enables having reusable specialized Mappers that can be combined to + perform composite operations within a single task. +

    + Special care has to be taken when creating chains that the key/values output + by a Mapper are valid for the following Mapper in the chain. It is assumed + all Mappers and the Reduce in the chain use maching output and input key and + value classes as no conversion is done by the chaining code. +

    + Using the ChainMapper and the ChainReducer classes is possible to compose + Map/Reduce jobs that look like [MAP+ / REDUCE MAP*]. And + immediate benefit of this pattern is a dramatic reduction in disk IO. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainReducer, this is done by the setReducer or the addMapper for the last + element in the chain. +

    + ChainReducer usage pattern: +

    +

    + ...
    + conf.setJobName("chain");
    + conf.setInputFormat(TextInputFormat.class);
    + conf.setOutputFormat(TextOutputFormat.class);
    + 

    + JobConf mapAConf = new JobConf(false); + ... + ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class, + Text.class, Text.class, true, mapAConf); +

    + JobConf mapBConf = new JobConf(false); + ... + ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class, + LongWritable.class, Text.class, false, mapBConf); +

    + JobConf reduceConf = new JobConf(false); + ... + ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class, + Text.class, Text.class, true, reduceConf); +

    + ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class, + LongWritable.class, Text.class, false, null); +

    + ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class, + LongWritable.class, LongWritable.class, true, null); +

    + FileInputFormat.setInputPaths(conf, inDir); + FileOutputFormat.setOutputPath(conf, outDir); + ... +

    + JobClient jc = new JobClient(conf); + RunningJob job = jc.submitJob(conf); + ... +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RecordReader's for CombineFileSplit's. + @see CombineFileSplit]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + th Path]]> + + + + + + th Path]]> + + + + + + + + + + + th Path]]> + + + + + + + + + + + + + + + + + + + + + + + + + + CombineFileSplit can be used to implement {@link org.apache.hadoop.mapred.RecordReader}'s, + with reading one record per file. + @see org.apache.hadoop.mapred.FileSplit + @see CombineFileInputFormat]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + all splits. + @param freq The frequency with which records will be emitted.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + all splits. + This will read every split at the client, which is very expensive. + @param freq Probability with which a key will be chosen. + @param numSamples Total number of samples to obtain from all selected + splits.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + all splits. + Takes the first numSamples / numSplits records from each split. + @param numSamples Total number of samples to obtain from all selected + splits.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the name output is multi, false + if it is single. If the name output is not defined it returns + false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @param conf job conf to add the named output + @param namedOutput named output name, it has to be a word, letters + and numbers only, cannot be the word 'part' as + that is reserved for the + default output. + @param outputFormatClass OutputFormat class. + @param keyClass key class + @param valueClass value class]]> + + + + + + + + + + + + @param conf job conf to add the named output + @param namedOutput named output name, it has to be a word, letters + and numbers only, cannot be the word 'part' as + that is reserved for the + default output. + @param outputFormatClass OutputFormat class. + @param keyClass key class + @param valueClass value class]]> + + + + + + + + By default these counters are disabled. +

    + MultipleOutputs supports counters, by default the are disabled. + The counters group is the {@link MultipleOutputs} class name. +

    + The names of the counters are the same as the named outputs. For multi + named outputs the name of the counter is the concatenation of the named + output, and underscore '_' and the multiname. + + @param conf job conf to enableadd the named output. + @param enabled indicates if the counters will be enabled or not.]]> +
    +
    + + + + + By default these counters are disabled. +

    + MultipleOutputs supports counters, by default the are disabled. + The counters group is the {@link MultipleOutputs} class name. +

    + The names of the counters are the same as the named outputs. For multi + named outputs the name of the counter is the concatenation of the named + output, and underscore '_' and the multiname. + + + @param conf job conf to enableadd the named output. + @return TRUE if the counters are enabled, FALSE if they are disabled.]]> +
    +
    + + + + + + + + + + + + + @param namedOutput the named output name + @param reporter the reporter + @return the output collector for the given named output + @throws IOException thrown if output collector could not be created]]> + + + + + + + + + + + @param namedOutput the named output name + @param multiName the multi name part + @param reporter the reporter + @return the output collector for the given named output + @throws IOException thrown if output collector could not be created]]> + + + + + + + If overriden subclasses must invoke super.close() at the + end of their close() + + @throws java.io.IOException thrown if any of the MultipleOutput files + could not be closed properly.]]> + + + + OutputCollector passed to + the map() and reduce() methods of the + Mapper and Reducer implementations. +

    + Each additional output, or named output, may be configured with its own + OutputFormat, with its own key class and with its own value + class. +

    + A named output can be a single file or a multi file. The later is refered as + a multi named output. +

    + A multi named output is an unbound set of files all sharing the same + OutputFormat, key class and value class configuration. +

    + When named outputs are used within a Mapper implementation, + key/values written to a name output are not part of the reduce phase, only + key/values written to the job OutputCollector are part of the + reduce phase. +

    + MultipleOutputs supports counters, by default the are disabled. The counters + group is the {@link MultipleOutputs} class name. +

    + The names of the counters are the same as the named outputs. For multi + named outputs the name of the counter is the concatenation of the named + output, and underscore '_' and the multiname. +

    + Job configuration usage pattern is: +

    +
    + JobConf conf = new JobConf();
    +
    + conf.setInputPath(inDir);
    + FileOutputFormat.setOutputPath(conf, outDir);
    +
    + conf.setMapperClass(MOMap.class);
    + conf.setReducerClass(MOReduce.class);
    + ...
    +
    + // Defines additional single text based output 'text' for the job
    + MultipleOutputs.addNamedOutput(conf, "text", TextOutputFormat.class,
    + LongWritable.class, Text.class);
    +
    + // Defines additional multi sequencefile based output 'sequence' for the
    + // job
    + MultipleOutputs.addMultiNamedOutput(conf, "seq",
    +   SequenceFileOutputFormat.class,
    +   LongWritable.class, Text.class);
    + ...
    +
    + JobClient jc = new JobClient();
    + RunningJob job = jc.submitJob(conf);
    +
    + ...
    + 
    +

    + Job configuration usage pattern is: +

    +
    + public class MOReduce implements
    +   Reducer<WritableComparable, Writable> {
    + private MultipleOutputs mos;
    +
    + public void configure(JobConf conf) {
    + ...
    + mos = new MultipleOutputs(conf);
    + }
    +
    + public void reduce(WritableComparable key, Iterator<Writable> values,
    + OutputCollector output, Reporter reporter)
    + throws IOException {
    + ...
    + mos.getCollector("text", reporter).collect(key, new Text("Hello"));
    + mos.getCollector("seq", "A", reporter).collect(key, new Text("Bye"));
    + mos.getCollector("seq", "B", reporter).collect(key, new Text("Chau"));
    + ...
    + }
    +
    + public void close() throws IOException {
    + mos.close();
    + ...
    + }
    +
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It can be used instead of the default implementation, + @link org.apache.hadoop.mapred.MapRunner, when the Map operation is not CPU + bound in order to improve throughput. +

    + Map implementations using this MapRunnable must be thread-safe. +

    + The Map-Reduce job has to be configured to use this MapRunnable class (using + the JobConf.setMapRunnerClass method) and + the number of thread the thread-pool can use with the + mapred.map.multithreadedrunner.threads property, its default + value is 10 threads. +

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + pairs. Uses + {@link StringTokenizer} to break text into tokens. + @deprecated Use + {@link org.apache.hadoop.mapreduce.lib.map.TokenCounterMapper} instead.]]> + + + + + + + + + + + + total.order.partitioner.natural.order is not false, a trie + of the first total.order.partitioner.max.trie.depth(2) + 1 bytes + will be built. Otherwise, keys will be located using a binary search of + the partition keyset using the {@link org.apache.hadoop.io.RawComparator} + defined for this job. The input file must be sorted with the same + comparator and contain {@link + org.apache.hadoop.mapred.JobConf#getNumReduceTasks} - 1 keys.]]> + + + + + + + + + + + + R reduces, there are R-1 + keys in the SequenceFile.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + generateKeyValPairs(Object key, Object value); public void + configure(JobConfjob); } + + The package also provides a base class, ValueAggregatorBaseDescriptor, + implementing the above interface. The user can extend the base class and + implement generateKeyValPairs accordingly. + + The primary work of generateKeyValPairs is to emit one or more key/value + pairs based on the input key/value pair. The key in an output key/value pair + encode two pieces of information: aggregation type and aggregation id. The + value will be aggregated onto the aggregation id according the aggregation + type. + + This class offers a function to generate a map/reduce job using Aggregate + framework. The function takes the following parameters: input directory spec + input format (text or sequence file) output directory a file specifying the + user plugin class]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The job can be configured using the static methods in this class, + {@link DBInputFormat}, and {@link DBOutputFormat}. +

    + Alternatively, the properties can be set in the configuration with proper + values. + + @see DBConfiguration#configureDB(JobConf, String, String, String, String) + @see DBInputFormat#setInput(JobConf, Class, String, String) + @see DBInputFormat#setInput(JobConf, Class, String, String, String, String...) + @see DBOutputFormat#setOutput(JobConf, String, String...)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 20070101 AND length > 0)' + @param orderBy the fieldNames in the orderBy clause. + @param fieldNames The field names in the table + @see #setInput(JobConf, Class, String, String)]]> + + + + + + + + + + + + + + DBInputFormat emits LongWritables containing the record number as + key and DBWritables as value. + + The SQL query, and input class can be using one of the two + setInput methods.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {@link DBOutputFormat} accepts <key,value> pairs, where + key has a type extending DBWritable. Returned {@link RecordWriter} + writes only the key to the database with a batch SQL query.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DBWritable. DBWritable, is similar to {@link Writable} + except that the {@link #write(PreparedStatement)} method takes a + {@link PreparedStatement}, and {@link #readFields(ResultSet)} + takes a {@link ResultSet}. +

    + Implementations are responsible for writing the fields of the object + to PreparedStatement, and reading the fields of the object from the + ResultSet. + +

    Example:

    + If we have the following table in the database : +
    + CREATE TABLE MyTable (
    +   counter        INTEGER NOT NULL,
    +   timestamp      BIGINT  NOT NULL,
    + );
    + 
    + then we can read/write the tuples from/to the table with : +

    + public class MyWritable implements Writable, DBWritable {
    +   // Some data     
    +   private int counter;
    +   private long timestamp;
    +       
    +   //Writable#write() implementation
    +   public void write(DataOutput out) throws IOException {
    +     out.writeInt(counter);
    +     out.writeLong(timestamp);
    +   }
    +       
    +   //Writable#readFields() implementation
    +   public void readFields(DataInput in) throws IOException {
    +     counter = in.readInt();
    +     timestamp = in.readLong();
    +   }
    +       
    +   public void write(PreparedStatement statement) throws SQLException {
    +     statement.setInt(1, counter);
    +     statement.setLong(2, timestamp);
    +   }
    +       
    +   public void readFields(ResultSet resultSet) throws SQLException {
    +     counter = resultSet.getInt(1);
    +     timestamp = resultSet.getLong(2);
    +   } 
    + }
    + 

    ]]> +
    + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Counters represent global counters, defined either by the + Map-Reduce framework or applications. Each Counter is named by + an {@link Enum} and has a long for the value.

    + +

    Counters are bunched into Groups, each comprising of + counters from a particular Enum class.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Each {@link InputSplit} is then assigned to an individual {@link Mapper} + for processing.

    + +

    Note: The split is a logical split of the inputs and the + input files are not physically split into chunks. For e.g. a split could + be <input-file-path, start, offset> tuple. The InputFormat + also creates the {@link RecordReader} to read the {@link InputSplit}. + + @param context job configuration. + @return an array of {@link InputSplit}s for the job.]]> + + + + + + + + + + + + + InputFormat describes the input-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the InputFormat of the + job to:

    +

      +
    1. + Validate the input-specification of the job. +
    2. + Split-up the input file(s) into logical {@link InputSplit}s, each of + which is then assigned to an individual {@link Mapper}. +
    3. +
    4. + Provide the {@link RecordReader} implementation to be used to glean + input records from the logical InputSplit for processing by + the {@link Mapper}. +
    5. +
    + +

    The default behavior of file-based {@link InputFormat}s, typically + sub-classes of {@link FileInputFormat}, is to split the + input into logical {@link InputSplit}s based on the total size, in + bytes, of the input files. However, the {@link FileSystem} blocksize of + the input files is treated as an upper bound for input splits. A lower bound + on the split size can be set via + + mapred.min.split.size.

    + +

    Clearly, logical splits based on input-size is insufficient for many + applications since record boundaries are to respected. In such cases, the + application has to also implement a {@link RecordReader} on whom lies the + responsibility to respect record-boundaries and present a record-oriented + view of the logical InputSplit to the individual task. + + @see InputSplit + @see RecordReader + @see FileInputFormat]]> + + + + + + + + + + + + + + + + + + + + + + + InputSplit represents the data to be processed by an + individual {@link Mapper}. + +

    Typically, it presents a byte-oriented view on the input and is the + responsibility of {@link RecordReader} of the job to process this and present + a record-oriented view. + + @see InputFormat + @see RecordReader]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + InputFormat to use + @throws IllegalStateException if the job is submitted]]> + + + + + + + OutputFormat to use + @throws IllegalStateException if the job is submitted]]> + + + + + + + Mapper to use + @throws IllegalStateException if the job is submitted]]> + + + + + + + + + + + + + + + + + + + + + + + + + Reducer to use + @throws IllegalStateException if the job is submitted]]> + + + + + + + Partitioner to use + @throws IllegalStateException if the job is submitted]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + progress of the job's map-tasks, as a float between 0.0 + and 1.0. When all map tasks have completed, the function returns 1.0. + + @return the progress of the job's map-tasks. + @throws IOException]]> + + + + + + progress of the job's reduce-tasks, as a float between 0.0 + and 1.0. When all reduce tasks have completed, the function returns 1.0. + + @return the progress of the job's reduce-tasks. + @throws IOException]]> + + + + + + true if the job is complete, else false. + @throws IOException]]> + + + + + + true if the job succeeded, else false. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobTracker is lost]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1. + @return the number of reduce tasks for this job.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + An example JobID is : + job_200707121733_0003 , which represents the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse JobID strings, but rather + use appropriate constructors or {@link #forName(String)} method. + + @see TaskID + @see TaskAttemptID + @see org.apache.hadoop.mapred.JobTracker#getNewJobId() + @see org.apache.hadoop.mapred.JobTracker#getStartTime()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + the key input type to the Mapper + @param the value input type to the Mapper + @param the key output type from the Mapper + @param the value output type from the Mapper]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Maps are the individual tasks which transform input records into a + intermediate records. The transformed intermediate records need not be of + the same type as the input records. A given input pair may map to zero or + many output pairs.

    + +

    The Hadoop Map-Reduce framework spawns one map task for each + {@link InputSplit} generated by the {@link InputFormat} for the job. + Mapper implementations can access the {@link Configuration} for + the job via the {@link JobContext#getConfiguration()}. + +

    The framework first calls + {@link #setup(org.apache.hadoop.mapreduce.Mapper.Context)}, followed by + {@link #map(Object, Object, Context)} + for each key/value pair in the InputSplit. Finally + {@link #cleanup(Context)} is called.

    + +

    All intermediate values associated with a given output key are + subsequently grouped by the framework, and passed to a {@link Reducer} to + determine the final output. Users can control the sorting and grouping by + specifying two key {@link RawComparator} classes.

    + +

    The Mapper outputs are partitioned per + Reducer. Users can control which keys (and hence records) go to + which Reducer by implementing a custom {@link Partitioner}. + +

    Users can optionally specify a combiner, via + {@link Job#setCombinerClass(Class)}, to perform local aggregation of the + intermediate outputs, which helps to cut down the amount of data transferred + from the Mapper to the Reducer. + +

    Applications can specify if and how the intermediate + outputs are to be compressed and which {@link CompressionCodec}s are to be + used via the Configuration.

    + +

    If the job has zero + reduces then the output of the Mapper is directly written + to the {@link OutputFormat} without sorting by keys.

    + +

    Example:

    +

    + public class TokenCounterMapper 
    +     extends Mapper{
    +    
    +   private final static IntWritable one = new IntWritable(1);
    +   private Text word = new Text();
    +   
    +   public void map(Object key, Text value, Context context) throws IOException {
    +     StringTokenizer itr = new StringTokenizer(value.toString());
    +     while (itr.hasMoreTokens()) {
    +       word.set(itr.nextToken());
    +       context.collect(word, one);
    +     }
    +   }
    + }
    + 

    + +

    Applications may override the {@link #run(Context)} method to exert + greater control on map processing e.g. multi-threaded Mappers + etc.

    + + @see InputFormat + @see JobContext + @see Partitioner + @see Reducer]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + OutputCommitter describes the commit of task output for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the OutputCommitter of + the job to:

    +

      +
    1. + Setup the job during initialization. For example, create the temporary + output directory for the job during the initialization of the job. +
    2. +
    3. + Cleanup the job after the job completion. For example, remove the + temporary output directory after the job completion. +
    4. +
    5. + Setup the task temporary output. +
    6. +
    7. + Check whether a task needs a commit. This is to avoid the commit + procedure if a task does not need commit. +
    8. +
    9. + Commit of the task output. +
    10. +
    11. + Discard the task commit. +
    12. +
    + + @see org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter + @see JobContext + @see TaskAttemptContext]]> +
    +
    + + + + + + + + + + + + + + + + + + + This is to validate the output specification for the job when it is + a job is submitted. Typically checks that it does not already exist, + throwing an exception when it already exists, so that output is not + overwritten.

    + + @param context information about the job + @throws IOException when output should not be attempted]]> +
    +
    + + + + + + + + + + OutputFormat describes the output-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the OutputFormat of the + job to:

    +

      +
    1. + Validate the output-specification of the job. For e.g. check that the + output directory doesn't already exist. +
    2. + Provide the {@link RecordWriter} implementation to be used to write out + the output files of the job. Output files are stored in a + {@link FileSystem}. +
    3. +
    + + @see RecordWriter]]> +
    +
    + + + + + + + + + + + Typically a hash function on a all or a subset of the key.

    + + @param key the key to be partioned. + @param value the entry value. + @param numPartitions the total number of partitions. + @return the partition number for the key.]]> +
    +
    + + Partitioner controls the partitioning of the keys of the + intermediate map-outputs. The key (or a subset of the key) is used to derive + the partition, typically by a hash function. The total number of partitions + is the same as the number of reduce tasks for the job. Hence this controls + which of the m reduce tasks the intermediate key (and hence the + record) is sent for reduction.

    + + @see Reducer]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @param ]]> + + + + + + + + + + + + + + + + + + + + + + RecordWriter to future operations. + + @param context the context of the task + @throws IOException]]> + + + + RecordWriter writes the output <key, value> pairs + to an output file. + +

    RecordWriter implementations write the job outputs to the + {@link FileSystem}. + + @see OutputFormat]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the input keys + @param the class of the input values + @param the class of the output keys + @param the class of the output values]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Reducer implementations + can access the {@link Configuration} for the job via the + {@link JobContext#getConfiguration()} method.

    + +

    Reducer has 3 primary phases:

    +
      +
    1. + +

      Shuffle

      + +

      The Reducer copies the sorted output from each + {@link Mapper} using HTTP across the network.

      +
    2. + +
    3. +

      Sort

      + +

      The framework merge sorts Reducer inputs by + keys + (since different Mappers may have output the same key).

      + +

      The shuffle and sort phases occur simultaneously i.e. while outputs are + being fetched they are merged.

      + +
      SecondarySort
      + +

      To achieve a secondary sort on the values returned by the value + iterator, the application should extend the key with the secondary + key and define a grouping comparator. The keys will be sorted using the + entire key, but will be grouped using the grouping comparator to decide + which keys and values are sent in the same call to reduce.The grouping + comparator is specified via + {@link Job#setGroupingComparatorClass(Class)}. The sort order is + controlled by + {@link Job#setSortComparatorClass(Class)}.

      + + + For example, say that you want to find duplicate web pages and tag them + all with the url of the "best" known example. You would set up the job + like: +
        +
      • Map Input Key: url
      • +
      • Map Input Value: document
      • +
      • Map Output Key: document checksum, url pagerank
      • +
      • Map Output Value: url
      • +
      • Partitioner: by checksum
      • +
      • OutputKeyComparator: by checksum and then decreasing pagerank
      • +
      • OutputValueGroupingComparator: by checksum
      • +
      +
    4. + +
    5. +

      Reduce

      + +

      In this phase the + {@link #reduce(Object, Iterable, Context)} + method is called for each <key, (collection of values)> in + the sorted inputs.

      +

      The output of the reduce task is typically written to a + {@link RecordWriter} via + {@link Context#write(Object, Object)}.

      +
    6. +
    + +

    The output of the Reducer is not re-sorted.

    + +

    Example:

    +

    + public class IntSumReducer extends Reducer {
    +   private IntWritable result = new IntWritable();
    + 
    +   public void reduce(Key key, Iterable values, 
    +                      Context context) throws IOException {
    +     int sum = 0;
    +     for (IntWritable val : values) {
    +       sum += val.get();
    +     }
    +     result.set(sum);
    +     context.collect(key, result);
    +   }
    + }
    + 

    + + @see Mapper + @see Partitioner]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + An example TaskAttemptID is : + attempt_200707121733_0003_m_000005_0 , which represents the + zeroth task attempt for the fifth map task in the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse TaskAttemptID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + An example TaskID is : + task_200707121733_0003_m_000005 , which represents the + fifth map task in the third job running at the jobtracker + started at 200707121733. +

    + Applications should never construct or parse TaskID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskAttemptID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the input key type for the task + @param the input value type for the task + @param the output key type for the task + @param the output value type for the task]]> + + + + + + + + + + + + + + + + + + + FileInputFormat implementations can override this and return + false to ensure that individual input files are never split-up + so that {@link Mapper}s process entire files. + + @param context the job context + @param filename the file name to check + @return is this file splitable?]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileInputFormat is the base class for all file-based + InputFormats. This provides a generic implementation of + {@link #getSplits(JobContext)}. + Subclasses of FileInputFormat can also override the + {@link #isSplitable(JobContext, Path)} method to ensure input-files are + not split-up and are processed as a whole by {@link Mapper}s.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the map's input key type + @param the map's input value type + @param the map's output key type + @param the map's output value type + @param job the job + @return the mapper class to run]]> + + + + + + + the map input key type + @param the map input value type + @param the map output key type + @param the map output value type + @param job the job to modify + @param cls the class to use as the mapper]]> + + + + + + + + + + + + + It can be used instead of the default implementation, + @link org.apache.hadoop.mapred.MapRunner, when the Map operation is not CPU + bound in order to improve throughput. +

    + Mapper implementations using this MapRunnable must be thread-safe. +

    + The Map-Reduce job has to be configured with the mapper to use via + {@link #setMapperClass(Configuration, Class)} and + the number of thread the thread-pool can use with the + {@link #getNumberOfThreads(Configuration) method. The default + value is 10 threads. +

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the job output should be compressed, + false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tasks' Side-Effect Files + +

    Some applications need to create/write-to side-files, which differ from + the actual job-outputs. + +

    In such cases there could be issues with 2 instances of the same TIP + (running simultaneously e.g. speculative tasks) trying to open/write-to the + same file (path) on HDFS. Hence the application-writer will have to pick + unique names per task-attempt (e.g. using the attemptid, say + attempt_200709221812_0001_m_000000_0), not just per TIP.

    + +

    To get around this the Map-Reduce framework helps the application-writer + out by maintaining a special + ${mapred.output.dir}/_temporary/_${taskid} + sub-directory for each task-attempt on HDFS where the output of the + task-attempt goes. On successful completion of the task-attempt the files + in the ${mapred.output.dir}/_temporary/_${taskid} (only) + are promoted to ${mapred.output.dir}. Of course, the + framework discards the sub-directory of unsuccessful task-attempts. This + is completely transparent to the application.

    + +

    The application-writer can take advantage of this by creating any + side-files required in a work directory during execution + of his task i.e. via + {@link #getWorkOutputPath(TaskInputOutputContext)}, and + the framework will move them out similarly - thus she doesn't have to pick + unique paths per task-attempt.

    + +

    The entire discussion holds true for maps of jobs with + reducer=NONE (i.e. 0 reduces) since output of the map, in that case, + goes directly to HDFS.

    + + @return the {@link Path} to the task's temporary output directory + for the map-reduce job.]]> +
    + + + + + + + + + The path can be used to create custom files from within the map and + reduce tasks. The path name will be unique for each task. The path parent + will be the job output directory.

    ls + +

    This method uses the {@link #getUniqueFile} method to make the file name + unique for the task.

    + + @param context the context for the task. + @param name the name for the file. + @param extension the extension for the file + @return a unique path accross all tasks of the job.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This tool supports archiving and anaylzing (sort/grep) of log-files. + It takes as input + a) Input uri which will serve uris of the logs to be archived. + b) Output directory (not mandatory). + b) Directory on dfs to archive the logs. + c) The sort/grep patterns for analyzing the files and separator for boundaries. + Usage: + Logalyzer -archive -archiveDir -analysis -logs -grep -sort -separator +

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/aarch64/share/hadoop/common/jdiff/hadoop_0.20.1.xml b/aarch64/share/hadoop/common/jdiff/hadoop_0.20.1.xml new file mode 100644 index 0000000..fc05639 --- /dev/null +++ b/aarch64/share/hadoop/common/jdiff/hadoop_0.20.1.xml @@ -0,0 +1,53832 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + final. + + @param name resource to be added, the classpath is examined for a file + with that name.]]> + + + + + + final. + + @param url url of the resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + final. + + @param file file-path of resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + final. + + @param in InputStream to deserialize the object from.]]> + + + + + + + + + + + name property, null if + no such property exists. + + Values are processed for variable expansion + before being returned. + + @param name the property name. + @return the value of the name property, + or null if no such property exists.]]> + + + + + + name property, without doing + variable expansion. + + @param name the property name. + @return the value of the name property, + or null if no such property exists.]]> + + + + + + + value of the name property. + + @param name property name. + @param value property value.]]> + + + + + + + + + + + + + + name property. If no such property + exists, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value, or defaultValue if the property + doesn't exist.]]> + + + + + + + name property as an int. + + If no such property exists, or if the specified value is not a valid + int, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as an int, + or defaultValue.]]> + + + + + + + name property to an int. + + @param name property name. + @param value int value of the property.]]> + + + + + + + name property as a long. + If no such property is specified, or if the specified value is not a valid + long, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a long, + or defaultValue.]]> + + + + + + + name property to a long. + + @param name property name. + @param value long value of the property.]]> + + + + + + + name property as a float. + If no such property is specified, or if the specified value is not a valid + float, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a float, + or defaultValue.]]> + + + + + + + name property to a float. + + @param name property name. + @param value property value.]]> + + + + + + + name property as a boolean. + If no such property is specified, or if the specified value is not a valid + boolean, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a boolean, + or defaultValue.]]> + + + + + + + name property to a boolean. + + @param name property name. + @param value boolean value of the property.]]> + + + + + + + + + + + + + + + + + + + + name property as + a collection of Strings. + If no such property is specified then empty collection is returned. +

    + This is an optimized version of {@link #getStrings(String)} + + @param name property name. + @return property value as a collection of Strings.]]> + + + + + + name property as + an array of Strings. + If no such property is specified then null is returned. + + @param name property name. + @return property value as an array of Strings, + or null.]]> + + + + + + + name property as + an array of Strings. + If no such property is specified then default value is returned. + + @param name property name. + @param defaultValue The default value + @return property value as an array of Strings, + or default value.]]> + + + + + + + name property as + as comma delimited values. + + @param name property name. + @param values The values]]> + + + + + + + + + + + + + + name property + as an array of Class. + The value of the property specifies a list of comma separated class names. + If no such property is specified, then defaultValue is + returned. + + @param name the property name. + @param defaultValue default value. + @return property value as a Class[], + or defaultValue.]]> + + + + + + + name property as a Class. + If no such property is specified, then defaultValue is + returned. + + @param name the class name. + @param defaultValue default value. + @return property value as a Class, + or defaultValue.]]> + + + + + + + + name property as a Class + implementing the interface specified by xface. + + If no such property is specified, then defaultValue is + returned. + + An exception is thrown if the returned class does not implement the named + interface. + + @param name the class name. + @param defaultValue default value. + @param xface the interface implemented by the named class. + @return property value as a Class, + or defaultValue.]]> + + + + + + + + name property to the name of a + theClass implementing the given interface xface. + + An exception is thrown if theClass does not implement the + interface xface. + + @param name property name. + @param theClass property value. + @param xface the interface implemented by the named class.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + + + + + name. + + @param name configuration resource name. + @return an input stream attached to the resource.]]> + + + + + + name. + + @param name configuration resource name. + @return a reader attached to the resource.]]> + + + + + + + + + + + + + + + String + key-value pairs in the configuration. + + @return an iterator over the entries.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + true to set quiet-mode on, false + to turn it off.]]> + + + + + + + + + + + + + + + + + + + Resources + +

    Configurations are specified by resources. A resource contains a set of + name/value pairs as XML data. Each resource is named by either a + String or by a {@link Path}. If named by a String, + then the classpath is examined for a file with that name. If named by a + Path, then the local filesystem is examined directly, without + referring to the classpath. + +

    Unless explicitly turned off, Hadoop by default specifies two + resources, loaded in-order from the classpath:

      +
    1. core-default.xml + : Read-only defaults for hadoop.
    2. +
    3. core-site.xml: Site-specific configuration for a given hadoop + installation.
    4. +
    + Applications may add additional resources, which are loaded + subsequent to these resources in the order they are added. + +

    Final Parameters

    + +

    Configuration parameters may be declared final. + Once a resource declares a value final, no subsequently-loaded + resource can alter that value. + For example, one might define a final parameter with: +

    +  <property>
    +    <name>dfs.client.buffer.dir</name>
    +    <value>/tmp/hadoop/dfs/client</value>
    +    <final>true</final>
    +  </property>
    + + Administrators typically define parameters as final in + core-site.xml for values that user applications may not alter. + +

    Variable Expansion

    + +

    Value strings are first processed for variable expansion. The + available properties are:

      +
    1. Other properties defined in this Configuration; and, if a name is + undefined here,
    2. +
    3. Properties in {@link System#getProperties()}.
    4. +
    + +

    For example, if a configuration resource contains the following property + definitions: +

    +  <property>
    +    <name>basedir</name>
    +    <value>/user/${user.name}</value>
    +  </property>
    +  
    +  <property>
    +    <name>tempdir</name>
    +    <value>${basedir}/tmp</value>
    +  </property>
    + + When conf.get("tempdir") is called, then ${basedir} + will be resolved to another property in this Configuration, while + ${user.name} would then ordinarily be resolved to the value + of the System property with that name.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DistributedCache is a facility provided by the Map-Reduce + framework to cache files (text, archives, jars etc.) needed by applications. +

    + +

    Applications specify the files, via urls (hdfs:// or http://) to be cached + via the {@link org.apache.hadoop.mapred.JobConf}. + The DistributedCache assumes that the + files specified via hdfs:// urls are already present on the + {@link FileSystem} at the path specified by the url.

    + +

    The framework will copy the necessary files on to the slave node before + any tasks for the job are executed on that node. Its efficiency stems from + the fact that the files are only copied once per job and the ability to + cache archives which are un-archived on the slaves.

    + +

    DistributedCache can be used to distribute simple, read-only + data/text files and/or more complex types such as archives, jars etc. + Archives (zip, tar and tgz/tar.gz files) are un-archived at the slave nodes. + Jars may be optionally added to the classpath of the tasks, a rudimentary + software distribution mechanism. Files have execution permissions. + Optionally users can also direct it to symlink the distributed cache file(s) + into the working directory of the task.

    + +

    DistributedCache tracks modification timestamps of the cache + files. Clearly the cache files should not be modified by the application + or externally while the job is executing.

    + +

    Here is an illustrative example on how to use the + DistributedCache:

    +

    +     // Setting up the cache for the application
    +     
    +     1. Copy the requisite files to the FileSystem:
    +     
    +     $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat  
    +     $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip  
    +     $ bin/hadoop fs -copyFromLocal mylib.jar /myapp/mylib.jar
    +     $ bin/hadoop fs -copyFromLocal mytar.tar /myapp/mytar.tar
    +     $ bin/hadoop fs -copyFromLocal mytgz.tgz /myapp/mytgz.tgz
    +     $ bin/hadoop fs -copyFromLocal mytargz.tar.gz /myapp/mytargz.tar.gz
    +     
    +     2. Setup the application's JobConf:
    +     
    +     JobConf job = new JobConf();
    +     DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"), 
    +                                   job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/map.zip", job);
    +     DistributedCache.addFileToClassPath(new Path("/myapp/mylib.jar"), job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytar.tar", job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytgz.tgz", job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytargz.tar.gz", job);
    +     
    +     3. Use the cached files in the {@link org.apache.hadoop.mapred.Mapper}
    +     or {@link org.apache.hadoop.mapred.Reducer}:
    +     
    +     public static class MapClass extends MapReduceBase  
    +     implements Mapper<K, V, K, V> {
    +     
    +       private Path[] localArchives;
    +       private Path[] localFiles;
    +       
    +       public void configure(JobConf job) {
    +         // Get the cached archives/files
    +         localArchives = DistributedCache.getLocalCacheArchives(job);
    +         localFiles = DistributedCache.getLocalCacheFiles(job);
    +       }
    +       
    +       public void map(K key, V value, 
    +                       OutputCollector<K, V> output, Reporter reporter) 
    +       throws IOException {
    +         // Use data from the cached archives/files here
    +         // ...
    +         // ...
    +         output.collect(k, v);
    +       }
    +     }
    +     
    + 

    + + @see org.apache.hadoop.mapred.JobConf + @see org.apache.hadoop.mapred.JobClient]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BufferedFSInputStream + with the specified buffer size, + and saves its argument, the input stream + in, for later use. An internal + buffer array of length size + is created and stored in buf. + + @param in the underlying input stream. + @param size the buffer size. + @exception IllegalArgumentException if size <= 0.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + setReplication of FileSystem + @param src file name + @param replication new replication + @throws IOException + @return true if successful; + false if file does not exist or is a directory]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + fs.scheme.class whose value names the FileSystem class. + The entire URI is passed to the FileSystem instance's initialize method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Return all the files that match filePattern and are not checksum + files. Results are sorted by their names. + +

    + A filename pattern is composed of regular characters and + special pattern matching characters, which are: + +

    +
    +
    +

    +

    ? +
    Matches any single character. + +

    +

    * +
    Matches zero or more characters. + +

    +

    [abc] +
    Matches a single character from character set + {a,b,c}. + +

    +

    [a-b] +
    Matches a single character from the character range + {a...b}. Note that character a must be + lexicographically less than or equal to character b. + +

    +

    [^a] +
    Matches a single character that is not from character set or range + {a}. Note that the ^ character must occur + immediately to the right of the opening bracket. + +

    +

    \c +
    Removes (escapes) any special meaning of character c. + +

    +

    {ab,cd} +
    Matches a string from the string set {ab, cd} + +

    +

    {ab,c{de,fh}} +
    Matches a string from the string set {ab, cde, cfh} + +
    +
    +
    + + @param pathPattern a regular expression specifying a pth pattern + + @return an array of paths that match the path pattern + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + All user code that may potentially use the Hadoop Distributed + File System should be written to use a FileSystem object. The + Hadoop DFS is a multi-machine system that appears as a single + disk. It's useful because of its fault tolerance and potentially + very large capacity. + +

    + The local implementation is {@link LocalFileSystem} and distributed + implementation is DistributedFileSystem.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FilterFileSystem contains + some other file system, which it uses as + its basic file system, possibly transforming + the data along the way or providing additional + functionality. The class FilterFileSystem + itself simply overrides all methods of + FileSystem with versions that + pass all requests to the contained file + system. Subclasses of FilterFileSystem + may further override some of these methods + and may also provide additional methods + and fields.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + buf at offset + and checksum into checksum. + The method is used for implementing read, therefore, it should be optimized + for sequential reading + @param pos chunkPos + @param buf desitination buffer + @param offset offset in buf at which to store data + @param len maximun number of bytes to read + @return number of bytes read]]> + + + + + + + + + + + + + + + + + -1 if the end of the + stream is reached. + @exception IOException if an I/O error occurs.]]> + + + + + + + + + This method implements the general contract of the corresponding + {@link InputStream#read(byte[], int, int) read} method of + the {@link InputStream} class. As an additional + convenience, it attempts to read as many bytes as possible by repeatedly + invoking the read method of the underlying stream. This + iterated read continues until one of the following + conditions becomes true:

      + +
    • The specified number of bytes have been read, + +
    • The read method of the underlying stream returns + -1, indicating end-of-file. + +
    If the first read on the underlying stream returns + -1 to indicate end-of-file then this method returns + -1. Otherwise this method returns the number of bytes + actually read. + + @param b destination buffer. + @param off offset at which to start storing bytes. + @param len maximum number of bytes to read. + @return the number of bytes read, or -1 if the end of + the stream has been reached. + @exception IOException if an I/O error occurs. + ChecksumException if any checksum error occurs]]> +
    + + + + + + + + + + + + + + + + + + n bytes of data from the + input stream. + +

    This method may skip more bytes than are remaining in the backing + file. This produces no exception and the number of bytes skipped + may include some number of bytes that were beyond the EOF of the + backing file. Attempting to read from the stream after skipping past + the end will result in -1 indicating the end of the file. + +

    If n is negative, no bytes are skipped. + + @param n the number of bytes to be skipped. + @return the actual number of bytes skipped. + @exception IOException if an I/O error occurs. + ChecksumException if the chunk to skip to is corrupted]]> + + + + + + + This method may seek past the end of the file. + This produces no exception and an attempt to read from + the stream will result in -1 indicating the end of the file. + + @param pos the postion to seek to. + @exception IOException if an I/O error occurs. + ChecksumException if the chunk to seek to is corrupted]]> + + + + + + + + + + len bytes from + stm + + @param stm an input stream + @param buf destiniation buffer + @param offset offset at which to store data + @param len number of bytes to read + @return actual number of bytes read + @throws IOException if there is any IO error]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + len bytes from the specified byte array + starting at offset off and generate a checksum for + each data chunk. + +

    This method stores bytes from the given array into this + stream's buffer before it gets checksumed. The buffer gets checksumed + and flushed to the underlying output stream when all data + in a checksum chunk are in the buffer. If the buffer is empty and + requested length is at least as large as the size of next checksum chunk + size, this method will checksum and write the chunk directly + to the underlying output stream. Thus it avoids uneccessary data copy. + + @param b the data. + @param off the start offset in the data. + @param len the number of bytes to write. + @exception IOException if an I/O error occurs.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if and only if pathname + should be included]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + trash feature. Files are moved to a user's trash + directory, a subdirectory of their home directory named ".Trash". Files are + initially moved to a current sub-directory of the trash directory. + Within that sub-directory their original path is preserved. Periodically + one may checkpoint the current trash and remove older checkpoints. (This + design permits trash management without enumeration of the full trash + content, without date support in the filesystem, and without clock + synchronization.)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} backed by an FTP client provided by Apache Commons Net. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (cause==null ? null : cause.toString()) (which + typically contains the class and detail message of cause). + @param cause the cause (which is saved for later retrieval by the + {@link #getCause()} method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is a tool for migrating data from an older to a newer version + of an S3 filesystem. +

    +

    + All files in the filesystem are migrated by re-writing the block metadata + - no datafiles are touched. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + Extracts AWS credentials from the filesystem URI or configuration. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A block-based {@link FileSystem} backed by + Amazon S3. +

    + @see NativeS3FileSystem]]> +
    +
    + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If f is a file, this method will make a single call to S3. + If f is a directory, this method will make a maximum of + (n / 1000) + 2 calls to S3, where n is the total number of + files and directories contained directly in f. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} for reading and writing files stored on + Amazon S3. + Unlike {@link org.apache.hadoop.fs.s3.S3FileSystem} this implementation + stores files on S3 in their + native form so they can be read by other S3 tools. +

    + @see org.apache.hadoop.fs.s3.S3FileSystem]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + . + @param name The name of the server + @param port The port to use on the server + @param findPort whether the server should start at the given port and + increment by 1 until it finds a free port. + @param conf Configuration]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + points to the log directory + "/static/" -> points to common static files (src/webapps/static) + "/" -> the jsp server code from (src/webapps/)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + nth value.]]> + + + + + + + + + + + + + + + + + + + + + nth value in the file.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + public class IntArrayWritable extends ArrayWritable { + public IntArrayWritable() { + super(IntWritable.class); + } + } + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a ByteWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new DataInputStream and + ByteArrayInputStream each time data is read. + +

    Typical usage is something like the following:

    +
    + DataInputBuffer buffer = new DataInputBuffer();
    + while (... loop condition ...) {
    +   byte[] data = ... get data ...;
    +   int dataLength = ... get data length ...;
    +   buffer.reset(data, dataLength);
    +   ... read buffer using DataInput methods ...
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new DataOutputStream and + ByteArrayOutputStream each time data is written. + +

    Typical usage is something like the following:

    +
    + DataOutputBuffer buffer = new DataOutputBuffer();
    + while (... loop condition ...) {
    +   buffer.reset();
    +   ... write buffer using DataOutput methods ...
    +   byte[] data = buffer.getData();
    +   int dataLength = buffer.getLength();
    +   ... write data to its ultimate destination ...
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + the class of the item + @param conf the configuration to store + @param item the object to be stored + @param keyName the name of the key to use + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param items the objects to be stored + @param keyName the name of the key to use + @throws IndexOutOfBoundsException if the items array is empty + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + DefaultStringifier offers convenience methods to store/load objects to/from + the configuration. + + @param the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a DoubleWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a FloatWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When two sequence files, which have same Key type but different Value + types, are mapped out to reduce, multiple Value types is not allowed. + In this case, this class can help you wrap instances with different types. +

    + +

    + Compared with ObjectWritable, this class is much more effective, + because ObjectWritable will append the class declaration as a String + into the output file in every Key-Value pair. +

    + +

    + Generic Writable implements {@link Configurable} interface, so that it will be + configured by the framework. The configuration is passed to the wrapped objects + implementing {@link Configurable} interface before deserialization. +

    + + how to use it:
    + 1. Write your own class, such as GenericObject, which extends GenericWritable.
    + 2. Implements the abstract method getTypes(), defines + the classes which will be wrapped in GenericObject in application. + Attention: this classes defined in getTypes() method, must + implement Writable interface. +

    + + The code looks like this: +
    + public class GenericObject extends GenericWritable {
    + 
    +   private static Class[] CLASSES = {
    +               ClassType1.class, 
    +               ClassType2.class,
    +               ClassType3.class,
    +               };
    +
    +   protected Class[] getTypes() {
    +       return CLASSES;
    +   }
    +
    + }
    + 
    + + @since Nov 8, 2006]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new InputStream and + ByteArrayInputStream each time data is read. + +

    Typical usage is something like the following:

    +
    + InputBuffer buffer = new InputBuffer();
    + while (... loop condition ...) {
    +   byte[] data = ... get data ...;
    +   int dataLength = ... get data length ...;
    +   buffer.reset(data, dataLength);
    +   ... read buffer using InputStream methods ...
    + }
    + 
    + @see DataInputBuffer + @see DataOutput]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a IntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + closes the input and output streams + at the end. + @param in InputStrem to read from + @param out OutputStream to write to + @param conf the Configuration object]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ignore any {@link IOException} or + null pointers. Must only be used for cleanup in exception handlers. + @param log the log to record problems to at debug level. Can be null. + @param closeables the objects to close]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a LongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A map is a directory containing two files, the data file, + containing all keys and values in the map, and a smaller index + file, containing a fraction of the keys. The fraction is determined by + {@link Writer#getIndexInterval()}. + +

    The index file is read entirely into memory. Thus key implementations + should try to keep themselves small. + +

    Map files are created by adding entries in-order. To maintain a large + database, perform updates by copying the previous version of a database and + merging in a sorted change list, to create a new version of the database in + a new file. Sorting large change lists can be done with {@link + SequenceFile.Sorter}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key and + val. Returns true if such a pair exists and false when at + the end of the map]]> + + + + + + + + + + + + + + + + key or if it does not exist, at the first entry + after the named key. + +- * @param key - key that we're trying to find +- * @param val - data value if key is found +- * @return - the key that was the closest match or null if eof.]]> + + + + + + + + + key does not exist, return + the first entry that falls just before the key. Otherwise, + return the record that sorts just after. + @return - the key that was the closest match or null if eof.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is an MD5Hash whose digest contains the + same values.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new OutputStream and + ByteArrayOutputStream each time data is written. + +

    Typical usage is something like the following:

    +
    + OutputBuffer buffer = new OutputBuffer();
    + while (... loop condition ...) {
    +   buffer.reset();
    +   ... write buffer using OutputStream methods ...
    +   byte[] data = buffer.getData();
    +   int dataLength = buffer.getLength();
    +   ... write data to its ultimate destination ...
    + }
    + 
    + @see DataOutputBuffer + @see InputBuffer]]> +
    +
    + + + + + + + + + + + + + + + A {@link Comparator} that operates directly on byte representations of + objects. +

    + @param + @see DeserializerComparator]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SequenceFiles are flat files consisting of binary key/value + pairs. + +

    SequenceFile provides {@link Writer}, {@link Reader} and + {@link Sorter} classes for writing, reading and sorting respectively.

    + + There are three SequenceFile Writers based on the + {@link CompressionType} used to compress key/value pairs: +
      +
    1. + Writer : Uncompressed records. +
    2. +
    3. + RecordCompressWriter : Record-compressed files, only compress + values. +
    4. +
    5. + BlockCompressWriter : Block-compressed files, both keys & + values are collected in 'blocks' + separately and compressed. The size of + the 'block' is configurable. +
    + +

    The actual compression algorithm used to compress key and/or values can be + specified by using the appropriate {@link CompressionCodec}.

    + +

    The recommended way is to use the static createWriter methods + provided by the SequenceFile to chose the preferred format.

    + +

    The {@link Reader} acts as the bridge and can read any of the above + SequenceFile formats.

    + +

    SequenceFile Formats

    + +

    Essentially there are 3 different formats for SequenceFiles + depending on the CompressionType specified. All of them share a + common header described below. + +

    +
      +
    • + version - 3 bytes of magic header SEQ, followed by 1 byte of actual + version number (e.g. SEQ4 or SEQ6) +
    • +
    • + keyClassName -key class +
    • +
    • + valueClassName - value class +
    • +
    • + compression - A boolean which specifies if compression is turned on for + keys/values in this file. +
    • +
    • + blockCompression - A boolean which specifies if block-compression is + turned on for keys/values in this file. +
    • +
    • + compression codec - CompressionCodec class which is used for + compression of keys and/or values (if compression is + enabled). +
    • +
    • + metadata - {@link Metadata} for this file. +
    • +
    • + sync - A sync marker to denote end of the header. +
    • +
    + +
    Uncompressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Value
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +
    Record-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Compressed Value
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +
    Block-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record Block +
        +
      • Compressed key-lengths block-size
      • +
      • Compressed key-lengths block
      • +
      • Compressed keys block-size
      • +
      • Compressed keys block
      • +
      • Compressed value-lengths block-size
      • +
      • Compressed value-lengths block
      • +
      • Compressed values block-size
      • +
      • Compressed values block
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +

    The compressed blocks of key lengths and value lengths consist of the + actual lengths of individual keys/values encoded in ZeroCompressedInteger + format.

    + + @see CompressionCodec]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key, skipping its + value. True if another entry exists, and false at end of file.]]> + + + + + + + + key and + val. Returns true if such a pair exists and false when at + end of file]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The position passed must be a position returned by {@link + SequenceFile.Writer#getLength()} when writing this file. To seek to an arbitrary + position, use {@link SequenceFile.Reader#sync(long)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SegmentDescriptor + @param segments the list of SegmentDescriptors + @param tmpDir the directory to write temporary files into + @return RawKeyValueIterator + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For best performance, applications should make sure that the {@link + Writable#readFields(DataInput)} implementation of their keys is + very efficient. In particular, it should avoid allocating memory.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This always returns a synchronized position. In other words, + immediately after calling {@link SequenceFile.Reader#seek(long)} with a position + returned by this method, {@link SequenceFile.Reader#next(Writable)} may be called. However + the key may be earlier in the file than key last written when this + method was called (e.g., with block-compression, it may be the first key + in the block that was being written when this method was called).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key. Returns + true if such a key exists and false when at the end of the set.]]> + + + + + + + key. + Returns key, or null if no match exists.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + position. Note that this + method avoids using the converter or doing String instatiation + @return the Unicode scalar value at position or -1 + if the position is invalid or points to a + trailing byte]]> + + + + + + + + + + what in the backing + buffer, starting as position start. The starting + position is measured in bytes and the return value is in + terms of byte position in the buffer. The backing buffer is + not converted to a string for this operation. + @return byte position of the first occurence of the search + string in the UTF-8 buffer or -1 if not found]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a Text with the same contents.]]> + + + + + + + + + + + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException.]]> + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException. + @return ByteBuffer: bytes stores at ByteBuffer.array() + and length is ByteBuffer.limit()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + In + addition, it provides methods for string traversal without converting the + byte array to a string.

    Also includes utilities for + serializing/deserialing a string, coding/decoding a string, checking if a + byte array contains valid UTF8 code, calculating the length of an encoded + string.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a UTF8 with the same contents.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + Also includes utilities for efficiently reading and writing UTF-8. + + @deprecated replaced by Text]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is useful when a class may evolve, so that instances written by the + old version of the class may still be processed by the new version. To + handle this situation, {@link #readFields(DataInput)} + implementations should catch {@link VersionMismatchException}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VIntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VLongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + out. + + @param out DataOuput to serialize this object into. + @throws IOException]]> + + + + + + + in. + +

    For efficiency, implementations should attempt to re-use storage in the + existing object where possible.

    + + @param in DataInput to deseriablize this object from. + @throws IOException]]> +
    + + + Any key or value type in the Hadoop Map-Reduce + framework implements this interface.

    + +

    Implementations typically implement a static read(DataInput) + method which constructs a new instance, calls {@link #readFields(DataInput)} + and returns the instance.

    + +

    Example:

    +

    +     public class MyWritable implements Writable {
    +       // Some data     
    +       private int counter;
    +       private long timestamp;
    +       
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +       
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +       
    +       public static MyWritable read(DataInput in) throws IOException {
    +         MyWritable w = new MyWritable();
    +         w.readFields(in);
    +         return w;
    +       }
    +     }
    + 

    ]]> +
    + + + + + + + + WritableComparables can be compared to each other, typically + via Comparators. Any type which is to be used as a + key in the Hadoop Map-Reduce framework should implement this + interface.

    + +

    Example:

    +

    +     public class MyWritableComparable implements WritableComparable {
    +       // Some data
    +       private int counter;
    +       private long timestamp;
    +       
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +       
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +       
    +       public int compareTo(MyWritableComparable w) {
    +         int thisValue = this.value;
    +         int thatValue = ((IntWritable)o).value;
    +         return (thisValue < thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
    +       }
    +     }
    + 

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The default implementation reads the data into two {@link + WritableComparable}s (using {@link + Writable#readFields(DataInput)}, then calls {@link + #compare(WritableComparable,WritableComparable)}.]]> + + + + + + + The default implementation uses the natural ordering, calling {@link + Comparable#compareTo(Object)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This base implemenation uses the natural ordering. To define alternate + orderings, override {@link #compare(WritableComparable,WritableComparable)}. + +

    One may optimize compare-intensive operations by overriding + {@link #compare(byte[],int,int,byte[],int,int)}. Static utility methods are + provided to assist in optimized implementations of this method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Enum type + @param in DataInput to read from + @param enumType Class type of Enum + @return Enum represented by String read from DataInput + @throws IOException]]> + + + + + + + + + + + + + + + + len number of bytes in input streamin + @param in input stream + @param len number of bytes to skip + @throws IOException when skipped less number of bytes]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CompressionCodec for which to get the + Compressor + @return Compressor for the given + CompressionCodec from the pool or a new one]]> + + + + + + CompressionCodec for which to get the + Decompressor + @return Decompressor for the given + CompressionCodec the pool or a new one]]> + + + + + + Compressor to be returned to the pool]]> + + + + + + Decompressor to be returned to the + pool]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Implementations are assumed to be buffered. This permits clients to + reposition the underlying input stream then call {@link #resetState()}, + without having to also synchronize client buffers.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + true if a preset dictionary is needed for decompression. + @return true if a preset dictionary is needed for decompression]]> + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FIXME: This array should be in a private or package private location, + since it could be modified by malicious code. +

    ]]> +
    + + + + This interface is public for historical purposes. You should have no need to + use it. +

    ]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Although BZip2 headers are marked with the magic "Bz" this + constructor expects the next byte in the stream to be the first one after + the magic. Thus callers have to skip the first two bytes. Otherwise this + constructor will throw an exception. +

    + + @throws IOException + if the stream content is malformed or an I/O error occurs. + @throws NullPointerException + if in == null]]> +
    +
    + + + + + + + + + + + + + + + The decompression requires large amounts of memory. Thus you should call the + {@link #close() close()} method as soon as possible, to force + CBZip2InputStream to release the allocated memory. See + {@link CBZip2OutputStream CBZip2OutputStream} for information about memory + usage. +

    + +

    + CBZip2InputStream reads bytes from the compressed source stream via + the single byte {@link java.io.InputStream#read() read()} method exclusively. + Thus you should consider to use a buffered source stream. +

    + +

    + Instances of this class are not threadsafe. +

    ]]> +
    +
    + + + + + + + + CBZip2OutputStream with a blocksize of 900k. + +

    + Attention: The caller is resonsible to write the two BZip2 magic + bytes "BZ" to the specified stream prior to calling this + constructor. +

    + + @param out * + the destination stream. + + @throws IOException + if an I/O error occurs in the specified stream. + @throws NullPointerException + if out == null.]]> +
    +
    + + + + CBZip2OutputStream with specified blocksize. + +

    + Attention: The caller is resonsible to write the two BZip2 magic + bytes "BZ" to the specified stream prior to calling this + constructor. +

    + + + @param out + the destination stream. + @param blockSize + the blockSize as 100k units. + + @throws IOException + if an I/O error occurs in the specified stream. + @throws IllegalArgumentException + if (blockSize < 1) || (blockSize > 9). + @throws NullPointerException + if out == null. + + @see #MIN_BLOCKSIZE + @see #MAX_BLOCKSIZE]]> +
    +
    + + + + + + + + + + + + + inputLength this method returns MAX_BLOCKSIZE + always. + + @param inputLength + The length of the data which will be compressed by + CBZip2OutputStream.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + == 1.]]> + + + + + == 9.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If you are ever unlucky/improbable enough to get a stack overflow whilst + sorting, increase the following constant and try again. In practice I + have never seen the stack go above 27 elems, so the following limit seems + very generous. +

    ]]> +
    +
    + + + The compression requires large amounts of memory. Thus you should call the + {@link #close() close()} method as soon as possible, to force + CBZip2OutputStream to release the allocated memory. +

    + +

    + You can shrink the amount of allocated memory and maybe raise the compression + speed by choosing a lower blocksize, which in turn may cause a lower + compression ratio. You can avoid unnecessary memory allocation by avoiding + using a blocksize which is bigger than the size of the input. +

    + +

    + You can compute the memory usage for compressing by the following formula: +

    + +
    + <code>400k + (9 * blocksize)</code>.
    + 
    + +

    + To get the memory required for decompression by {@link CBZip2InputStream + CBZip2InputStream} use +

    + +
    + <code>65k + (5 * blocksize)</code>.
    + 
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Memory usage by blocksize
    Blocksize Compression
    + memory usage
    Decompression
    + memory usage
    100k1300k565k
    200k2200k1065k
    300k3100k1565k
    400k4000k2065k
    500k4900k2565k
    600k5800k3065k
    700k6700k3565k
    800k7600k4065k
    900k8500k4565k
    + +

    + For decompression CBZip2InputStream allocates less memory if the + bzipped input is smaller than one block. +

    + +

    + Instances of this class are not threadsafe. +

    + +

    + TODO: Update to BZip2 1.0.1 +

    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return the total (non-negative) number of uncompressed bytes input so far]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return the total (non-negative) number of uncompressed bytes input so far]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if native-zlib is loaded & initialized + and can be loaded for this job, else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • "none" - No compression. +
  • "lzo" - LZO compression. +
  • "gz" - GZIP compression. + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • Block Compression. +
  • Named meta data blocks. +
  • Sorted or unsorted keys. +
  • Seek by key or by file offset. + + The memory footprint of a TFile includes the following: +
      +
    • Some constant overhead of reading or writing a compressed block. +
        +
      • Each compressed block requires one compression/decompression codec for + I/O. +
      • Temporary space to buffer the key. +
      • Temporary space to buffer the value (for TFile.Writer only). Values are + chunk encoded, so that we buffer at most one chunk of user data. By default, + the chunk buffer is 1MB. Reading chunked value does not require additional + memory. +
      +
    • TFile index, which is proportional to the total number of Data Blocks. + The total amount of memory needed to hold the index can be estimated as + (56+AvgKeySize)*NumBlocks. +
    • MetaBlock index, which is proportional to the total number of Meta + Blocks.The total amount of memory needed to hold the index for Meta Blocks + can be estimated as (40+AvgMetaBlockName)*NumMetaBlock. +
    +

    + The behavior of TFile can be customized by the following variables through + Configuration: +

      +
    • tfile.io.chunk.size: Value chunk size. Integer (in bytes). Default + to 1MB. Values of the length less than the chunk size is guaranteed to have + known value length in read time (See + {@link TFile.Reader.Scanner.Entry#isValueLengthKnown()}). +
    • tfile.fs.output.buffer.size: Buffer size used for + FSDataOutputStream. Integer (in bytes). Default to 256KB. +
    • tfile.fs.input.buffer.size: Buffer size used for + FSDataInputStream. Integer (in bytes). Default to 256KB. +
    +

    + Suggestions on performance optimization. +

      +
    • Minimum block size. We recommend a setting of minimum block size between + 256KB to 1MB for general usage. Larger block size is preferred if files are + primarily for sequential access. However, it would lead to inefficient random + access (because there are more data to decompress). Smaller blocks are good + for random access, but require more memory to hold the block index, and may + be slower to create (because we must flush the compressor stream at the + conclusion of each data block, which leads to an FS I/O flush). Further, due + to the internal caching in Compression codec, the smallest possible block + size would be around 20KB-30KB. +
    • The current implementation does not offer true multi-threading for + reading. The implementation uses FSDataInputStream seek()+read(), which is + shown to be much faster than positioned-read call in single thread mode. + However, it also means that if multiple threads attempt to access the same + TFile (using multiple scanners) simultaneously, the actual I/O is carried out + sequentially even if they access different DFS blocks. +
    • Compression codec. Use "none" if the data is not very compressable (by + compressable, I mean a compression ratio at least 2:1). Generally, use "lzo" + as the starting point for experimenting. "gz" overs slightly better + compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to + decompress, comparing to "lzo". +
    • File system buffering, if the underlying FSDataInputStream and + FSDataOutputStream is already adequately buffered; or if applications + reads/writes keys and values in large buffers, we can reduce the sizes of + input/output buffering in TFile layer by setting the configuration parameters + "tfile.fs.input.buffer.size" and "tfile.fs.output.buffer.size". +
    + + Some design rationale behind TFile can be found at Hadoop-3315.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + entry of the TFile. + @param endKey + End key of the scan. If null, scan up to the last entry + of the TFile. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Use {@link Scanner#atEnd()} to test whether the cursor is at the end + location of the scanner. +

    + Use {@link Scanner#advance()} to move the cursor to the next key-value + pair (or end if none exists). Use seekTo methods ( + {@link Scanner#seekTo(byte[])} or + {@link Scanner#seekTo(byte[], int, int)}) to seek to any arbitrary + location in the covered range (including backward seeking). Use + {@link Scanner#rewind()} to seek back to the beginning of the scanner. + Use {@link Scanner#seekToEnd()} to seek to the end of the scanner. +

    + Actual keys and values may be obtained through {@link Scanner.Entry} + object, which is obtained through {@link Scanner#entry()}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

  • Algorithmic comparator: binary comparators that is language + independent. Currently, only "memcmp" is supported. +
  • Language-specific comparator: binary comparators that can + only be constructed in specific language. For Java, the syntax + is "jclass:", followed by the class name of the RawComparator. + Currently, we only support RawComparators that can be + constructed through the default constructor (with no + parameters). Parameterized RawComparators such as + {@link WritableComparator} or + {@link JavaSerializationComparator} may not be directly used. + One should write a wrapper class that inherits from such classes + and use its default constructor to perform proper + initialization. + + @param conf + The configuration object. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + If an exception is thrown, the TFile will be in an inconsistent + state. The only legitimate call after that would be close]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Utils#writeVLong(out, n). + + @param out + output stream + @param n + The integer to be encoded + @throws IOException + @see Utils#writeVLong(DataOutput, long)]]> + + + + + + + + +
  • if n in [-32, 127): encode in one byte with the actual value. + Otherwise, +
  • if n in [-20*2^8, 20*2^8): encode in two bytes: byte[0] = n/256 - 52; + byte[1]=n&0xff. Otherwise, +
  • if n IN [-16*2^16, 16*2^16): encode in three bytes: byte[0]=n/2^16 - + 88; byte[1]=(n>>8)&0xff; byte[2]=n&0xff. Otherwise, +
  • if n in [-8*2^24, 8*2^24): encode in four bytes: byte[0]=n/2^24 - 112; + byte[1] = (n>>16)&0xff; byte[2] = (n>>8)&0xff; byte[3]=n&0xff. Otherwise: +
  • if n in [-2^31, 2^31): encode in five bytes: byte[0]=-125; byte[1] = + (n>>24)&0xff; byte[2]=(n>>16)&0xff; byte[3]=(n>>8)&0xff; byte[4]=n&0xff; +
  • if n in [-2^39, 2^39): encode in six bytes: byte[0]=-124; byte[1] = + (n>>32)&0xff; byte[2]=(n>>24)&0xff; byte[3]=(n>>16)&0xff; + byte[4]=(n>>8)&0xff; byte[5]=n&0xff +
  • if n in [-2^47, 2^47): encode in seven bytes: byte[0]=-123; byte[1] = + (n>>40)&0xff; byte[2]=(n>>32)&0xff; byte[3]=(n>>24)&0xff; + byte[4]=(n>>16)&0xff; byte[5]=(n>>8)&0xff; byte[6]=n&0xff; +
  • if n in [-2^55, 2^55): encode in eight bytes: byte[0]=-122; byte[1] = + (n>>48)&0xff; byte[2] = (n>>40)&0xff; byte[3]=(n>>32)&0xff; + byte[4]=(n>>24)&0xff; byte[5]=(n>>16)&0xff; byte[6]=(n>>8)&0xff; + byte[7]=n&0xff; +
  • if n in [-2^63, 2^63): encode in nine bytes: byte[0]=-121; byte[1] = + (n>>54)&0xff; byte[2] = (n>>48)&0xff; byte[3] = (n>>40)&0xff; + byte[4]=(n>>32)&0xff; byte[5]=(n>>24)&0xff; byte[6]=(n>>16)&0xff; + byte[7]=(n>>8)&0xff; byte[8]=n&0xff; + + + @param out + output stream + @param n + the integer number + @throws IOException]]> + + + + + + + (int)Utils#readVLong(in). + + @param in + input stream + @return the decoded integer + @throws IOException + + @see Utils#readVLong(DataInput)]]> + + + + + + + +
  • if (FB >= -32), return (long)FB; +
  • if (FB in [-72, -33]), return (FB+52)<<8 + NB[0]&0xff; +
  • if (FB in [-104, -73]), return (FB+88)<<16 + (NB[0]&0xff)<<8 + + NB[1]&0xff; +
  • if (FB in [-120, -105]), return (FB+112)<<24 + (NB[0]&0xff)<<16 + + (NB[1]&0xff)<<8 + NB[2]&0xff; +
  • if (FB in [-128, -121]), return interpret NB[FB+129] as a signed + big-endian integer. + + @param in + input stream + @return the decoded long integer. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @param cmp + Comparator for the key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @param cmp + Comparator for the key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Keep trying a limited number of times, waiting a fixed time between attempts, + and then fail by re-throwing the exception. +

    ]]> +
    +
    + + + + + + + Keep trying for a maximum time, waiting a fixed time between attempts, + and then fail by re-throwing the exception. +

    ]]> +
    +
    + + + + + + + Keep trying a limited number of times, waiting a growing amount of time between attempts, + and then fail by re-throwing the exception. + The time between attempts is sleepTime mutliplied by the number of tries so far. +

    ]]> +
    +
    + + + + + + + Keep trying a limited number of times, waiting a growing amount of time between attempts, + and then fail by re-throwing the exception. + The time between attempts is sleepTime mutliplied by a random + number in the range of [0, 2 to the number of retries) +

    ]]> +
    +
    + + + + + + Set a default policy with some explicit handlers for specific exceptions. +

    ]]> +
    +
    + + + + + + A retry policy for RemoteException + Set a default policy with some explicit handlers for specific exceptions. +

    ]]> +
    +
    + + + + Try once, and fail by re-throwing the exception. + This corresponds to having no retry mechanism in place. +

    ]]> +
    +
    + + + + Try once, and fail silently for void methods, or by + re-throwing the exception for non-void methods. +

    ]]> +
    +
    + + + + Keep trying forever. +

    ]]> +
    +
    + + + A collection of useful implementations of {@link RetryPolicy}. +

    ]]> +
    +
    + + + + + + + + + + Determines whether the framework should retry a + method for the given exception, and the number + of retries that have been made for that operation + so far. +

    + @param e The exception that caused the method to fail. + @param retries The number of times the method has been retried. + @return true if the method should be retried, + false if the method should not be retried + but shouldn't fail with an exception (only for void methods). + @throws Exception The re-thrown exception e indicating + that the method failed and should not be retried further.]]> +
    +
    + + + Specifies a policy for retrying method failures. + Implementations of this interface should be immutable. +

    ]]> +
    +
    + + + + + + + + + + + + Create a proxy for an interface of an implementation class + using the same retry policy for each method in the interface. +

    + @param iface the interface that the retry will implement + @param implementation the instance whose methods should be retried + @param retryPolicy the policy for retirying method call failures + @return the retry proxy]]> +
    +
    + + + + + + + Create a proxy for an interface of an implementation class + using the a set of retry policies specified by method name. + If no retry policy is defined for a method then a default of + {@link RetryPolicies#TRY_ONCE_THEN_FAIL} is used. +

    + @param iface the interface that the retry will implement + @param implementation the instance whose methods should be retried + @param methodNameToPolicyMap a map of method names to retry policies + @return the retry proxy]]> +
    +
    + + + A factory for creating retry proxies. +

    ]]> +
    +
    + +
    + + + + + + + + Prepare the deserializer for reading.

    ]]> +
    +
    + + + + + + Deserialize the next object from the underlying input stream. + If the object t is non-null then this deserializer + may set its internal state to the next object read from the input + stream. Otherwise, if the object t is null a new + deserialized object will be created. +

    + @return the deserialized object]]> +
    +
    + + + + Close the underlying input stream and clear up any resources.

    ]]> +
    +
    + + + Provides a facility for deserializing objects of type from an + {@link InputStream}. +

    + +

    + Deserializers are stateful, but must not buffer the input since + other producers may read from the input between calls to + {@link #deserialize(Object)}. +

    + @param ]]> +
    +
    + + + + + + + + + + + + + + + + + + A {@link RawComparator} that uses a {@link Deserializer} to deserialize + the objects to be compared so that the standard {@link Comparator} can + be used to compare them. +

    +

    + One may optimize compare-intensive operations by using a custom + implementation of {@link RawComparator} that operates directly + on byte representations. +

    + @param ]]> +
    +
    + + + + + + + + + + + + + + + + + + An experimental {@link Serialization} for Java {@link Serializable} classes. +

    + @see JavaSerializationComparator]]> +
    +
    + + + + + + + + + + + + + A {@link RawComparator} that uses a {@link JavaSerialization} + {@link Deserializer} to deserialize objects that are then compared via + their {@link Comparable} interfaces. +

    + @param + @see JavaSerialization]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + Encapsulates a {@link Serializer}/{@link Deserializer} pair. +

    + @param ]]> +
    +
    + + + + + + + Serializations are found by reading the io.serializations + property from conf, which is a comma-delimited list of + classnames. +

    ]]> +
    +
    + + + + + + + + + + + + A factory for {@link Serialization}s. +

    ]]> +
    +
    + + + + + + + + Prepare the serializer for writing.

    ]]> +
    +
    + + + + + Serialize t to the underlying output stream.

    ]]> +
    +
    + + + + Close the underlying output stream and clear up any resources.

    ]]> +
    +
    + + + Provides a facility for serializing objects of type to an + {@link OutputStream}. +

    + +

    + Serializers are stateful, but must not buffer the output since + other producers may write to the output between calls to + {@link #serialize(Object)}. +

    + @param ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + param, to the IPC server running at + address, returning the value. Throws exceptions if there are + network problems or if the remote code threw an exception. + @deprecated Use {@link #call(Writable, InetSocketAddress, Class, UserGroupInformation)} instead]]> + + + + + + + + + + param, to the IPC server running at + address with the ticket credentials, returning + the value. + Throws exceptions if there are network problems or if the remote code + threw an exception. + @deprecated Use {@link #call(Writable, InetSocketAddress, Class, UserGroupInformation)} instead]]> + + + + + + + + + + + param, to the IPC server running at + address which is servicing the protocol protocol, + with the ticket credentials, returning the value. + Throws exceptions if there are network problems or if the remote code + threw an exception.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Unwraps any IOException. + + @param lookupTypes the desired exception class. + @return IOException, which is either the lookupClass exception or this.]]> + + + + + This unwraps any Throwable that has a constructor taking + a String as a parameter. + Otherwise it returns this. + + @return Throwable]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + protocol is a Java interface. All parameters and return types must + be one of: + +
    • a primitive type, boolean, byte, + char, short, int, long, + float, double, or void; or
    • + +
    • a {@link String}; or
    • + +
    • a {@link Writable}; or
    • + +
    • an array of the above types
    + + All methods in the protocol should throw only IOException. No field data of + the protocol instance is transmitted.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + handlerCount determines + the number of handler threads that will be used to process calls.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + ,name=RpcActivityForPort" + + Many of the activity metrics are sampled and averaged on an interval + which can be specified in the metrics config file. +

    + For the metrics that are sampled and averaged, one must specify + a metrics context that does periodic update calls. Most metrics contexts do. + The default Null metrics context however does NOT. So if you aren't + using any other metrics context then you can turn on the viewing and averaging + of sampled metrics by specifying the following two lines + in the hadoop-meterics.properties file: +

    +        rpc.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
    +        rpc.period=10
    +  
    +

    + Note that the metrics are collected regardless of the context used. + The context with the update thread is used to average the data periodically + + + + Impl details: We use a dynamic mbean that gets the list of the metrics + from the metrics registry passed as an argument to the constructor]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class has a number of metrics variables that are publicly accessible; + these variables (objects) have methods to update their values; + for example: +

    {@link #rpcQueueTime}.inc(time)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For the statistics that are sampled and averaged, one must specify + a metrics context that does periodic update calls. Most do. + The default Null metrics context however does NOT. So if you aren't + using any other metrics context then you can turn on the viewing and averaging + of sampled metrics by specifying the following two lines + in the hadoop-meterics.properties file: +

    +        rpc.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
    +        rpc.period=10
    +  
    +

    + Note that the metrics are collected regardless of the context used. + The context with the update thread is used to average the data periodically]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When constructing the instance, if the factory property + contextName.class exists, + its value is taken to be the name of the class to instantiate. Otherwise, + the default is to create an instance of + org.apache.hadoop.metrics.spi.NullContext, which is a + dummy "no-op" context which will cause all metric data to be discarded. + + @param contextName the name of the context + @return the named MetricsContext]]> + + + + + + + + + + + + + + + + + + + + + When the instance is constructed, this method checks if the file + hadoop-metrics.properties exists on the class path. If it + exists, it must be in the format defined by java.util.Properties, and all + the properties in the file are set as attributes on the newly created + ContextFactory instance. + + @return the singleton ContextFactory instance]]> + + + + getFactory() method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + startMonitoring() again after calling + this. + @see #close()]]> + + + + + + + + + + + + + + + + recordName. + Throws an exception if the metrics implementation is configured with a fixed + set of record names and recordName is not in that set. + + @param recordName the name of the record + @throws MetricsException if recordName conflicts with configuration data]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A record name identifies the kind of data to be reported. For example, a + program reporting statistics relating to the disks on a computer might use + a record name "diskStats".

    + + A record has zero or more tags. A tag has a name and a value. To + continue the example, the "diskStats" record might use a tag named + "diskName" to identify a particular disk. Sometimes it is useful to have + more than one tag, so there might also be a "diskType" with value "ide" or + "scsi" or whatever.

    + + A record also has zero or more metrics. These are the named + values that are to be reported to the metrics system. In the "diskStats" + example, possible metric names would be "diskPercentFull", "diskPercentBusy", + "kbReadPerSecond", etc.

    + + The general procedure for using a MetricsRecord is to fill in its tag and + metric values, and then call update() to pass the record to the + client library. + Metric data is not immediately sent to the metrics system + each time that update() is called. + An internal table is maintained, identified by the record name. This + table has columns + corresponding to the tag and the metric names, and rows + corresponding to each unique set of tag values. An update + either modifies an existing row in the table, or adds a new row with a set of + tag values that are different from all the other rows. Note that if there + are no tags, then there can be at most one row in the table.

    + + Once a row is added to the table, its data will be sent to the metrics system + on every timer period, whether or not it has been updated since the previous + timer period. If this is inappropriate, for example if metrics were being + reported by some transient object in an application, the remove() + method can be used to remove the row and thus stop the data from being + sent.

    + + Note that the update() method is atomic. This means that it is + safe for different threads to be updating the same metric. More precisely, + it is OK for different threads to call update() on MetricsRecord instances + with the same set of tag names and tag values. Different threads should + not use the same MetricsRecord instance at the same time.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MetricsContext.registerUpdater().]]> + + + + + + + + + + + + + + + + + + + + + + + + + fileName attribute, + if specified. Otherwise the data will be written to standard + output.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is configured by setting ContextFactory attributes which in turn + are usually configured through a properties file. All the attributes are + prefixed by the contextName. For example, the properties file might contain: +

    + myContextName.fileName=/tmp/metrics.log
    + myContextName.period=5
    + 
    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + contextName.tableName. The returned map consists of + those attributes with the contextName and tableName stripped off.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + recordName. + Throws an exception if the metrics implementation is configured with a fixed + set of record names and recordName is not in that set. + + @param recordName the name of the record + @throws MetricsException if recordName conflicts with configuration data]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class implements the internal table of metric data, and the timer + on which data is to be sent to the metrics system. Subclasses must + override the abstract emitRecord method in order to transmit + the data.

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + update + and remove().]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hostname or hostname:port. If + the specs string is null, defaults to localhost:defaultPort. + + @return a list of InetSocketAddress objects.]]> + + + + + + + + + + + + + + + + + + + ,name=" + Where the and are the supplied parameters + + @param serviceName + @param nameName + @param theMbean - the MBean to register + @return the named used to register the MBean]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hadoop.rpc.socket.factory.class.<ClassName>. When no + such parameter exists then fall back on the default socket factory as + configured by hadoop.rpc.socket.factory.class.default. If + this default socket factory is not configured, then fall back on the JVM + default socket factory. + + @param conf the configuration + @param clazz the class (usually a {@link VersionedProtocol}) + @return a socket factory]]> + + + + + + hadoop.rpc.socket.factory.default + + @param conf the configuration + @return the default socket factory as specified in the configuration or + the JVM default socket factory if the configuration does not + contain a default socket factory property.]]> + + + + + + + + + + + + + : + ://:/]]> + + + + + + + + : + ://:/]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + From documentation for {@link #getInputStream(Socket, long)}:
    + Returns InputStream for the socket. If the socket has an associated + SocketChannel then it returns a + {@link SocketInputStream} with the given timeout. If the socket does not + have a channel, {@link Socket#getInputStream()} is returned. In the later + case, the timeout argument is ignored and the timeout set with + {@link Socket#setSoTimeout(int)} applies for reads.

    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getInputStream()}. + + @see #getInputStream(Socket, long) + + @param socket + @return InputStream for reading from the socket. + @throws IOException]]> +
    +
    + + + + + +
    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getInputStream()}. + + @see Socket#getChannel() + + @param socket + @param timeout timeout in milliseconds. This may not always apply. zero + for waiting as long as necessary. + @return InputStream for reading from the socket. + @throws IOException]]> +
    +
    + + + + +
    + + From documentation for {@link #getOutputStream(Socket, long)} :
    + Returns OutputStream for the socket. If the socket has an associated + SocketChannel then it returns a + {@link SocketOutputStream} with the given timeout. If the socket does not + have a channel, {@link Socket#getOutputStream()} is returned. In the later + case, the timeout argument is ignored and the write will wait until + data is available.

    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getOutputStream()}. + + @see #getOutputStream(Socket, long) + + @param socket + @return OutputStream for writing to the socket. + @throws IOException]]> +
    +
    + + + + + +
    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getOutputStream()}. + + @see Socket#getChannel() + + @param socket + @param timeout timeout in milliseconds. This may not always apply. zero + for waiting as long as necessary. + @return OutputStream for writing to the socket. + @throws IOException]]> +
    +
    + + + + + + + socket.connect(endpoint, timeout). If + socket.getChannel() returns a non-null channel, + connect is implemented using Hadoop's selectors. This is done mainly + to avoid Sun's connect implementation from creating thread-local + selectors, since Hadoop does not have control on when these are closed + and could end up taking all the available file descriptors. + + @see java.net.Socket#connect(java.net.SocketAddress, int) + + @param socket + @param endpoint + @param timeout - timeout in milliseconds]]> + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + node + + @param node + a node + @return true if node is already in the tree; false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + scope + if scope starts with ~, choose one from the all nodes except for the + ones in scope; otherwise, choose one from scope + @param scope range of nodes from which a node will be choosen + @return the choosen node]]> + + + + + + + scope but not in excludedNodes + if scope starts with ~, return the number of nodes that are not + in scope and excludedNodes; + @param scope a path string that may start with ~ + @param excludedNodes a list of nodes + @return number of available nodes]]> + + + + + + + + + + + + reader + It linearly scans the array, if a local node is found, swap it with + the first element of the array. + If a local rack node is found, swap it with the first element following + the local node. + If neither local node or local rack node is found, put a random replica + location at postion 0. + It leaves the rest nodes untouched.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + Create a new input stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + + @see SocketInputStream#SocketInputStream(ReadableByteChannel, long) + + @param socket should have a channel associated with it. + @param timeout timeout timeout in milliseconds. must not be negative. + @throws IOException]]> +
    +
    + + + +
    + + Create a new input stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + @see SocketInputStream#SocketInputStream(ReadableByteChannel, long) + + @param socket should have a channel associated with it. + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + +
    + + Create a new ouput stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + + @see SocketOutputStream#SocketOutputStream(WritableByteChannel, long) + + @param socket should have a channel associated with it. + @param timeout timeout timeout in milliseconds. must not be negative. + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + = getCount(). + @param newCapacity The new capacity in bytes.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Index idx = startVector(...); + while (!idx.done()) { + .... // read element of a vector + idx.incr(); + } + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This task takes the given record definition files and compiles them into + java or c++ + files. It is then up to the user to compile the generated files. + +

    The task requires the file or the nested fileset element to be + specified. Optional attributes are language (set the output + language, default is "java"), + destdir (name of the destination directory for generated java/c++ + code, default is ".") and failonerror (specifies error handling + behavior. default is true). +

    Usage

    +
    + <recordcc
    +       destdir="${basedir}/gensrc"
    +       language="java">
    +   <fileset include="**\/*.jr" />
    + </recordcc>
    + 
    ]]> +
    +
    + +
    + + + + + + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (cause==null ? null : cause.toString()) (which + typically contains the class and detail message of cause). + @param cause the cause (which is saved for later retrieval by the + {@link #getCause()} method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.)]]> + + + + + + + + + + + + + Group with the given groupname. + @param group group name]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ugi. + @param ugi user + @return the {@link Subject} for the user identified by ugi]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ugi as a comma separated string in + conf as a property attr + + The String starts with the user name followed by the default group names, + and other group names. + + @param conf configuration + @param attr property name + @param ugi a UnixUserGroupInformation]]> + + + + + + + + conf + + The object is expected to store with the property name attr + as a comma separated string that starts + with the user name followed by group names. + If the property name is not defined, return null. + It's assumed that there is only one UGI per user. If this user already + has a UGI in the ugi map, return the ugi in the map. + Otherwise, construct a UGI from the configuration, store it in the + ugi map and return it. + + @param conf configuration + @param attr property name + @return a UnixUGI + @throws LoginException if the stored string is ill-formatted.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + User with the given username. + @param user user name]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (cause==null ? null : cause.toString()) (which + typically contains the class and detail message of cause). + @param cause the cause (which is saved for later retrieval by the + {@link #getCause()} method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.)]]> + + + + + + + + + + + + + + does not provide the stack trace for security purposes.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + service as related to + Service Level Authorization for Hadoop. + + Each service defines it's configuration key and also the necessary + {@link Permission} required to access the service.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + in]]> + + + + + + + out.]]> + + + + + + + + + + reset is true, then resets the checksum. + @return number of bytes written. Will be equal to getChecksumSize();]]> + + + + + + + + + reset is true, then resets the checksum. + @return number of bytes written. Will be equal to getChecksumSize();]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GenericOptionsParser to parse only the generic Hadoop + arguments. + + The array of string arguments other than the generic arguments can be + obtained by {@link #getRemainingArgs()}. + + @param conf the Configuration to modify. + @param args command-line arguments.]]> + + + + + GenericOptionsParser to parse given options as well + as generic Hadoop options. + + The resulting CommandLine object can be obtained by + {@link #getCommandLine()}. + + @param conf the configuration to modify + @param options options built by the caller + @param args User-specified arguments]]> + + + + + Strings containing the un-parsed arguments + or empty array if commandLine was not defined.]]> + + + + + + + + + + CommandLine object + to process the parsed arguments. + + Note: If the object is created with + {@link #GenericOptionsParser(Configuration, String[])}, then returned + object will only contain parsed generic options. + + @return CommandLine representing list of arguments + parsed against Options descriptor.]]> + + + + + + + + + + + + + + + + + GenericOptionsParser is a utility to parse command line + arguments generic to the Hadoop framework. + + GenericOptionsParser recognizes several standarad command + line arguments, enabling applications to easily specify a namenode, a + jobtracker, additional configuration resources etc. + +

    Generic Options

    + +

    The supported generic options are:

    +

    +     -conf <configuration file>     specify a configuration file
    +     -D <property=value>            use value for given property
    +     -fs <local|namenode:port>      specify a namenode
    +     -jt <local|jobtracker:port>    specify a job tracker
    +     -files <comma separated list of files>    specify comma separated
    +                            files to be copied to the map reduce cluster
    +     -libjars <comma separated list of jars>   specify comma separated
    +                            jar files to include in the classpath.
    +     -archives <comma separated list of archives>    specify comma
    +             separated archives to be unarchived on the compute machines.
    +
    + 

    + +

    The general command line syntax is:

    +

    + bin/hadoop command [genericOptions] [commandOptions]
    + 

    + +

    Generic command line arguments might modify + Configuration objects, given to constructors.

    + +

    The functionality is implemented using Commons CLI.

    + +

    Examples:

    +

    + $ bin/hadoop dfs -fs darwin:8020 -ls /data
    + list /data directory in dfs with namenode darwin:8020
    + 
    + $ bin/hadoop dfs -D fs.default.name=darwin:8020 -ls /data
    + list /data directory in dfs with namenode darwin:8020
    +     
    + $ bin/hadoop dfs -conf hadoop-site.xml -ls /data
    + list /data directory in dfs with conf specified in hadoop-site.xml
    +     
    + $ bin/hadoop job -D mapred.job.tracker=darwin:50020 -submit job.xml
    + submit a job to job tracker darwin:50020
    +     
    + $ bin/hadoop job -jt darwin:50020 -submit job.xml
    + submit a job to job tracker darwin:50020
    +     
    + $ bin/hadoop job -jt local -submit job.xml
    + submit a job to local runner
    + 
    + $ bin/hadoop jar -libjars testlib.jar 
    + -archives test.tgz -files file.txt inputjar args
    + job submission with libjars, files and archives
    + 

    + + @see Tool + @see ToolRunner]]> +
    +
    + + + + + + + + + Class<T>) of the + argument of type T. + @param The type of the argument + @param t the object to get it class + @return Class<T>]]> + + + + + + + List<T> to a an array of + T[]. + @param c the Class object of the items in the list + @param list the list to convert]]> + + + + + + List<T> to a an array of + T[]. + @param list the list to convert + @throws ArrayIndexOutOfBoundsException if the list is empty. + Use {@link #toArray(Class, List)} if the list may be empty.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + io.file.buffer.size specified in the given + Configuration. + @param in input stream + @param conf configuration + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if native-hadoop is loaded, + else false]]> + + + + + + true if native hadoop libraries, if present, can be + used for this job; false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { pq.top().change(); pq.adjustTop(); } + instead of
    +  { o = pq.pop(); o.change(); pq.push(o); }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Clients and/or applications can use the provided Progressable + to explicitly report progress to the Hadoop framework. This is especially + important for operations which take an insignificant amount of time since, + in-lieu of the reported progress, the framework has to assume that an error + has occured and time-out the operation.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Class is to be obtained + @return the correctly typed Class of the given object.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Hadoop Pipes + or Hadoop Streaming. + + It also checks to ensure that we are running on a *nix platform else + (e.g. in Cygwin/Windows) it returns null. + @param conf configuration + @return a String[] with the ulimit command arguments or + null if we are running on a non *nix platform or + if the limit is unspecified.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Shell interface. + @param cmd shell command to execute. + @return the output of the executed command.]]> + + + + + + + + Shell interface. + @param env the map of environment key=value + @param cmd shell command to execute. + @return the output of the executed command.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + Shell can be used to run unix commands like du or + df. It also offers facilities to gate commands by + time-intervals.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ShellCommandExecutorshould be used in cases where the output + of the command needs no explicit parsing and where the command, working + directory and the environment remains unchanged. The output of the command + is stored as-is and is expected to be small.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ArrayList of string values]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + charToEscape in the string + with the escape char escapeChar + + @param str string + @param escapeChar escape char + @param charToEscape the char to be escaped + @return an escaped string]]> + + + + + + + + + + + + + + + + + + + + + + charToEscape in the string + with the escape char escapeChar + + @param str string + @param escapeChar escape char + @param charToEscape the escaped char + @return an unescaped string]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tool, is the standard for any Map-Reduce tool/application. + The tool/application should delegate the handling of + + standard command-line options to {@link ToolRunner#run(Tool, String[])} + and only handle its custom arguments.

    + +

    Here is how a typical Tool is implemented:

    +

    +     public class MyApp extends Configured implements Tool {
    +     
    +       public int run(String[] args) throws Exception {
    +         // Configuration processed by ToolRunner
    +         Configuration conf = getConf();
    +         
    +         // Create a JobConf using the processed conf
    +         JobConf job = new JobConf(conf, MyApp.class);
    +         
    +         // Process custom command-line options
    +         Path in = new Path(args[1]);
    +         Path out = new Path(args[2]);
    +         
    +         // Specify various job-specific parameters     
    +         job.setJobName("my-app");
    +         job.setInputPath(in);
    +         job.setOutputPath(out);
    +         job.setMapperClass(MyApp.MyMapper.class);
    +         job.setReducerClass(MyApp.MyReducer.class);
    +
    +         // Submit the job, then poll for progress until the job is complete
    +         JobClient.runJob(job);
    +       }
    +       
    +       public static void main(String[] args) throws Exception {
    +         // Let ToolRunner handle generic command-line options 
    +         int res = ToolRunner.run(new Configuration(), new Sort(), args);
    +         
    +         System.exit(res);
    +       }
    +     }
    + 

    + + @see GenericOptionsParser + @see ToolRunner]]> +
    +
    + + + + + + + + + + + + Tool by {@link Tool#run(String[])}, after + parsing with the given generic arguments. Uses the given + Configuration, or builds one if null. + + Sets the Tool's configuration with the possibly modified + version of the conf. + + @param conf Configuration for the Tool. + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + Tool with its Configuration. + + Equivalent to run(tool.getConf(), tool, args). + + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + + + ToolRunner can be used to run classes implementing + Tool interface. It works in conjunction with + {@link GenericOptionsParser} to parse the + + generic hadoop command line arguments and modifies the + Configuration of the Tool. The + application-specific options are passed along without being modified. +

    + + @see Tool + @see GenericOptionsParser]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Bloom filter, as defined by Bloom in 1970. +

    + The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by + the networking research community in the past decade thanks to the bandwidth efficiencies that it + offers for the transmission of set membership information between networked hosts. A sender encodes + the information into a bit vector, the Bloom filter, that is more compact than a conventional + representation. Computation and space costs for construction are linear in the number of elements. + The receiver uses the filter to test whether various elements are members of the set. Though the + filter will occasionally return a false positive, it will never return a false negative. When creating + the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size. + +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + + @see Space/Time Trade-Offs in Hash Coding with Allowable Errors]]> + + + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + this counting Bloom filter. +

    + Invariant: nothing happens if the specified key does not belong to this counter Bloom filter. + @param key The key to remove.]]> + + + + + + + + + + + + key -> count map. +

    NOTE: due to the bucket size of this filter, inserting the same + key more than 15 times will cause an overflow at all filter positions + associated with this key, and it will significantly increase the error + rate for this and other keys. For this reason the filter can only be + used to store small count values 0 <= N << 15. + @param key key to be tested + @return 0 if the key is not present. Otherwise, a positive value v will + be returned such that v == count with probability equal to the + error rate of this filter, and v > count otherwise. + Additionally, if the filter experienced an underflow as a result of + {@link #delete(Key)} operation, the return value may be lower than the + count with the probability of the false negative rate of such + filter.]]> + + + + + + + + + + + + + + + + + + + + + + counting Bloom filter, as defined by Fan et al. in a ToN + 2000 paper. +

    + A counting Bloom filter is an improvement to standard a Bloom filter as it + allows dynamic additions and deletions of set membership information. This + is achieved through the use of a counting vector instead of a bit vector. +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + + @see Summary cache: a scalable wide-area web cache sharing protocol]]> + + + + + + + + + + + + + + Builds an empty Dynamic Bloom filter. + @param vectorSize The number of bits in the vector. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}). + @param nr The threshold for the maximum number of keys to record in a + dynamic Bloom filter row.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + dynamic Bloom filter, as defined in the INFOCOM 2006 paper. +

    + A dynamic Bloom filter (DBF) makes use of a s * m bit matrix but + each of the s rows is a standard Bloom filter. The creation + process of a DBF is iterative. At the start, the DBF is a 1 * m + bit matrix, i.e., it is composed of a single standard Bloom filter. + It assumes that nr elements are recorded in the + initial bit vector, where nr <= n (n is + the cardinality of the set A to record in the filter). +

    + As the size of A grows during the execution of the application, + several keys must be inserted in the DBF. When inserting a key into the DBF, + one must first get an active Bloom filter in the matrix. A Bloom filter is + active when the number of recorded keys, nr, is + strictly less than the current cardinality of A, n. + If an active Bloom filter is found, the key is inserted and + nr is incremented by one. On the other hand, if there + is no active Bloom filter, a new one is created (i.e., a new row is added to + the matrix) according to the current size of A and the element + is added in this new Bloom filter and the nr value of + this new Bloom filter is set to one. A given key is said to belong to the + DBF if the k positions are set to one in one of the matrix rows. +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + @see BloomFilter A Bloom filter + + @see Theory and Network Applications of Dynamic Bloom Filters]]> + + + + + + + + + + + this filter. + @param nbHash The number of hash functions to consider. + @param hashType type of the hashing function (see {@link Hash}).]]> + + + + + + this filter. + @param key The key to add.]]> + + + + + + this filter. + @param key The key to test. + @return boolean True if the specified key belongs to this filter. + False otherwise.]]> + + + + + + this filter and a specified filter. +

    + Invariant: The result is assigned to this filter. + @param filter The filter to AND with.]]> + + + + + + this filter and a specified filter. +

    + Invariant: The result is assigned to this filter. + @param filter The filter to OR with.]]> + + + + + + this filter and a specified filter. +

    + Invariant: The result is assigned to this filter. + @param filter The filter to XOR with.]]> + + + + + this filter. +

    + The result is assigned to this filter.]]> + + + + + + this filter. + @param keys The list of keys.]]> + + + + + + this filter. + @param keys The collection of keys.]]> + + + + + + this filter. + @param keys The array of keys.]]> + + + + + + + + + + + + + this filter.]]> + + + + + + + + + + + + + + + + + + + + A filter is a data structure which aims at offering a lossy summary of a set A. The + key idea is to map entries of A (also called keys) into several positions + in a vector through the use of several hash functions. +

    + Typically, a filter will be implemented as a Bloom filter (or a Bloom filter extension). +

    + It must be extended in order to define the real behavior. + + @see Key The general behavior of a key + @see HashFunction A hash function]]> + + + + + + + + + Builds a hash function that must obey to a given maximum number of returned values and a highest value. + @param maxValue The maximum highest returned value. + @param nbHash The number of resulting hashed values. + @param hashType type of the hashing function (see {@link Hash}).]]> + + + + + this hash function. A NOOP]]> + + + + + + + + + + + + + + + + + + + + + + + + + Builds a key with a default weight. + @param value The byte value of this key.]]> + + + + + + Builds a key with a specified weight. + @param value The value of this key. + @param weight The weight associated to this key.]]> + + + + + + + + + + + + this key.]]> + + + + + this key.]]> + + + + + + this key with a specified value. + @param weight The increment.]]> + + + + + this key by one.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The idea is to randomly select a bit to reset.]]> + + + + + + The idea is to select the bit to reset that will generate the minimum + number of false negative.]]> + + + + + + The idea is to select the bit to reset that will remove the maximum number + of false positive.]]> + + + + + + The idea is to select the bit to reset that will, at the same time, remove + the maximum number of false positve while minimizing the amount of false + negative generated.]]> + + + + + Originally created by + European Commission One-Lab Project 034819.]]> + + + + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + this retouched Bloom filter. +

    + Invariant: if the false positive is null, nothing happens. + @param key The false positive key to add.]]> + + + + + + this retouched Bloom filter. + @param coll The collection of false positive.]]> + + + + + + this retouched Bloom filter. + @param keys The list of false positive.]]> + + + + + + this retouched Bloom filter. + @param keys The array of false positive.]]> + + + + + + + this retouched Bloom filter. + @param scheme The selective clearing scheme to apply.]]> + + + + + + + + + + + + retouched Bloom filter, as defined in the CoNEXT 2006 paper. +

    + It allows the removal of selected false positives at the cost of introducing + random false negatives, and with the benefit of eliminating some random false + positives at the same time. + +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + @see BloomFilter A Bloom filter + @see RemoveScheme The different selective clearing algorithms + + @see Retouched Bloom Filters: Allowing Networked Applications to Trade Off Selected False Positives Against False Negatives]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + length, and + the provided seed value + @param bytes input bytes + @param length length of the valid bytes to consider + @param initval seed value + @return hash value]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The best hash table sizes are powers of 2. There is no need to do mod + a prime (mod is sooo slow!). If you need less than 32 bits, use a bitmask. + For example, if you need only 10 bits, do + h = (h & hashmask(10)); + In which case, the hash table should have hashsize(10) elements. + +

    If you are hashing n strings byte[][] k, do it like this: + for (int i = 0, h = 0; i < n; ++i) h = hash( k[i], h); + +

    By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this + code any way you wish, private, educational, or commercial. It's free. + +

    Use for hash table lookup, or anything where one collision in 2^^32 is + acceptable. Do NOT use for cryptographic purposes.]]> + + + + + + + + + + + lookup3.c, by Bob Jenkins, May 2006, Public Domain. + + You can use this free for any purpose. It's in the public domain. + It has no warranty. + + + @see lookup3.c + @see Hash Functions (and how this + function compares to others such as CRC, MD?, etc + @see Has update on the + Dr. Dobbs Article]]> + + + + + + + + + + + + + + + + The C version of MurmurHash 2.0 found at that site was ported + to Java by Andrzej Bialecki (ab at getopt org).

    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobTracker, + as {@link JobTracker.State} + + @return the current state of the JobTracker.]]> + + + + + JobTracker + + @return the size of heap memory used by the JobTracker]]> + + + + + JobTracker + + @return the configured size of max heap memory that can be used by the JobTracker]]> + + + + + + + + + + + + ClusterStatus provides clients with information such as: +
      +
    1. + Size of the cluster. +
    2. +
    3. + Name of the trackers. +
    4. +
    5. + Task capacity of the cluster. +
    6. +
    7. + The number of currently running map & reduce tasks. +
    8. +
    9. + State of the JobTracker. +
    10. +

    + +

    Clients can query for the latest ClusterStatus, via + {@link JobClient#getClusterStatus()}.

    + + @see JobClient]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Counters represent global counters, defined either by the + Map-Reduce framework or applications. Each Counter can be of + any {@link Enum} type.

    + +

    Counters are bunched into {@link Group}s, each comprising of + counters from a particular Enum class. + @deprecated Use {@link org.apache.hadoop.mapreduce.Counters} instead.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Group of counters, comprising of counters from a particular + counter {@link Enum} class. + +

    Grouphandles localization of the class name and the + counter names.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileInputFormat implementations can override this and return + false to ensure that individual input files are never split-up + so that {@link Mapper}s process entire files. + + @param fs the file system that the file is on + @param filename the file name to check + @return is this file splitable?]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileInputFormat is the base class for all file-based + InputFormats. This provides a generic implementation of + {@link #getSplits(JobConf, int)}. + Subclasses of FileInputFormat can also override the + {@link #isSplitable(FileSystem, Path)} method to ensure input-files are + not split-up and are processed as a whole by {@link Mapper}s. + @deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat} + instead.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the job output should be compressed, + false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tasks' Side-Effect Files + +

    Note: The following is valid only if the {@link OutputCommitter} + is {@link FileOutputCommitter}. If OutputCommitter is not + a FileOutputCommitter, the task's temporary output + directory is same as {@link #getOutputPath(JobConf)} i.e. + ${mapred.output.dir}$

    + +

    Some applications need to create/write-to side-files, which differ from + the actual job-outputs. + +

    In such cases there could be issues with 2 instances of the same TIP + (running simultaneously e.g. speculative tasks) trying to open/write-to the + same file (path) on HDFS. Hence the application-writer will have to pick + unique names per task-attempt (e.g. using the attemptid, say + attempt_200709221812_0001_m_000000_0), not just per TIP.

    + +

    To get around this the Map-Reduce framework helps the application-writer + out by maintaining a special + ${mapred.output.dir}/_temporary/_${taskid} + sub-directory for each task-attempt on HDFS where the output of the + task-attempt goes. On successful completion of the task-attempt the files + in the ${mapred.output.dir}/_temporary/_${taskid} (only) + are promoted to ${mapred.output.dir}. Of course, the + framework discards the sub-directory of unsuccessful task-attempts. This + is completely transparent to the application.

    + +

    The application-writer can take advantage of this by creating any + side-files required in ${mapred.work.output.dir} during execution + of his reduce-task i.e. via {@link #getWorkOutputPath(JobConf)}, and the + framework will move them out similarly - thus she doesn't have to pick + unique paths per task-attempt.

    + +

    Note: the value of ${mapred.work.output.dir} during + execution of a particular task-attempt is actually + ${mapred.output.dir}/_temporary/_{$taskid}, and this value is + set by the map-reduce framework. So, just create any side-files in the + path returned by {@link #getWorkOutputPath(JobConf)} from map/reduce + task to take advantage of this feature.

    + +

    The entire discussion holds true for maps of jobs with + reducer=NONE (i.e. 0 reduces) since output of the map, in that case, + goes directly to HDFS.

    + + @return the {@link Path} to the task's temporary output directory + for the map-reduce job.]]> +
    +
    + + + + + + + + + + + + + The generated name can be used to create custom files from within the + different tasks for the job, the names for different tasks will not collide + with each other.

    + +

    The given name is postfixed with the task type, 'm' for maps, 'r' for + reduces and the task partition number. For example, give a name 'test' + running on the first map o the job the generated name will be + 'test-m-00000'.

    + + @param conf the configuration for the job. + @param name the name to make unique. + @return a unique name accross all tasks of the job.]]> +
    +
    + + + + + The path can be used to create custom files from within the map and + reduce tasks. The path name will be unique for each task. The path parent + will be the job output directory.

    ls + +

    This method uses the {@link #getUniqueName} method to make the file name + unique for the task.

    + + @param conf the configuration for the job. + @param name the name for the file. + @return a unique path accross all tasks of the job.]]> +
    +
    + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Each {@link InputSplit} is then assigned to an individual {@link Mapper} + for processing.

    + +

    Note: The split is a logical split of the inputs and the + input files are not physically split into chunks. For e.g. a split could + be <input-file-path, start, offset> tuple. + + @param job job configuration. + @param numSplits the desired number of splits, a hint. + @return an array of {@link InputSplit}s for the job.]]> + + + + + + + + + It is the responsibility of the RecordReader to respect + record boundaries while processing the logical split to present a + record-oriented view to the individual task.

    + + @param split the {@link InputSplit} + @param job the job that this split belongs to + @return a {@link RecordReader}]]> +
    +
    + + InputFormat describes the input-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the InputFormat of the + job to:

    +

      +
    1. + Validate the input-specification of the job. +
    2. + Split-up the input file(s) into logical {@link InputSplit}s, each of + which is then assigned to an individual {@link Mapper}. +
    3. +
    4. + Provide the {@link RecordReader} implementation to be used to glean + input records from the logical InputSplit for processing by + the {@link Mapper}. +
    5. +
    + +

    The default behavior of file-based {@link InputFormat}s, typically + sub-classes of {@link FileInputFormat}, is to split the + input into logical {@link InputSplit}s based on the total size, in + bytes, of the input files. However, the {@link FileSystem} blocksize of + the input files is treated as an upper bound for input splits. A lower bound + on the split size can be set via + + mapred.min.split.size.

    + +

    Clearly, logical splits based on input-size is insufficient for many + applications since record boundaries are to respected. In such cases, the + application has to also implement a {@link RecordReader} on whom lies the + responsibilty to respect record-boundaries and present a record-oriented + view of the logical InputSplit to the individual task. + + @see InputSplit + @see RecordReader + @see JobClient + @see FileInputFormat + @deprecated Use {@link org.apache.hadoop.mapreduce.InputFormat} instead.]]> + + + + + + + + + + InputSplit. + + @return the number of bytes in the input split. + @throws IOException]]> + + + + + + InputSplit is + located as an array of Strings. + @throws IOException]]> + + + + InputSplit represents the data to be processed by an + individual {@link Mapper}. + +

    Typically, it presents a byte-oriented view on the input and is the + responsibility of {@link RecordReader} of the job to process this and present + a record-oriented view. + + @see InputFormat + @see RecordReader + @deprecated Use {@link org.apache.hadoop.mapreduce.InputSplit} instead.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobClient.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + jobid doesn't correspond to any known job. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobClient is the primary interface for the user-job to interact + with the {@link JobTracker}. + + JobClient provides facilities to submit jobs, track their + progress, access component-tasks' reports/logs, get the Map-Reduce cluster + status information etc. + +

    The job submission process involves: +

      +
    1. + Checking the input and output specifications of the job. +
    2. +
    3. + Computing the {@link InputSplit}s for the job. +
    4. +
    5. + Setup the requisite accounting information for the {@link DistributedCache} + of the job, if necessary. +
    6. +
    7. + Copying the job's jar and configuration to the map-reduce system directory + on the distributed file-system. +
    8. +
    9. + Submitting the job to the JobTracker and optionally monitoring + it's status. +
    10. +

    + + Normally the user creates the application, describes various facets of the + job via {@link JobConf} and then uses the JobClient to submit + the job and monitor its progress. + +

    Here is an example on how to use JobClient:

    +

    +     // Create a new JobConf
    +     JobConf job = new JobConf(new Configuration(), MyJob.class);
    +     
    +     // Specify various job-specific parameters     
    +     job.setJobName("myjob");
    +     
    +     job.setInputPath(new Path("in"));
    +     job.setOutputPath(new Path("out"));
    +     
    +     job.setMapperClass(MyJob.MyMapper.class);
    +     job.setReducerClass(MyJob.MyReducer.class);
    +
    +     // Submit the job, then poll for progress until the job is complete
    +     JobClient.runJob(job);
    + 

    + +

    Job Control

    + +

    At times clients would chain map-reduce jobs to accomplish complex tasks + which cannot be done via a single map-reduce job. This is fairly easy since + the output of the job, typically, goes to distributed file-system and that + can be used as the input for the next job.

    + +

    However, this also means that the onus on ensuring jobs are complete + (success/failure) lies squarely on the clients. In such situations the + various job-control options are: +

      +
    1. + {@link #runJob(JobConf)} : submits the job and returns only after + the job has completed. +
    2. +
    3. + {@link #submitJob(JobConf)} : only submits the job, then poll the + returned handle to the {@link RunningJob} to query status and make + scheduling decisions. +
    4. +
    5. + {@link JobConf#setJobEndNotificationURI(String)} : setup a notification + on job-completion, thus avoiding polling. +
    6. +

    + + @see JobConf + @see ClusterStatus + @see Tool + @see DistributedCache]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If the parameter {@code loadDefaults} is false, the new instance + will not load resources from the default files. + + @param loadDefaults specifies whether to load from the default files]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if framework should keep the intermediate files + for failed tasks, false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the outputs of the maps are to be compressed, + false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This comparator should be provided if the equivalence rules for keys + for sorting the intermediates are different from those for grouping keys + before each call to + {@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.

    + +

    For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed + in a single call to the reduce function if K1 and K2 compare as equal.

    + +

    Since {@link #setOutputKeyComparatorClass(Class)} can be used to control + how keys are sorted, this can be used in conjunction to simulate + secondary sort on values.

    + +

    Note: This is not a guarantee of the reduce sort being + stable in any sense. (In any case, with the order of available + map-outputs to the reduce being non-deterministic, it wouldn't make + that much sense.)

    + + @param theClass the comparator class to be used for grouping keys. + It should implement RawComparator. + @see #setOutputKeyComparatorClass(Class)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + combiner class used to combine map-outputs + before being sent to the reducers. Typically the combiner is same as the + the {@link Reducer} for the job i.e. {@link #getReducerClass()}. + + @return the user-defined combiner class used to combine map-outputs.]]> + + + + + + combiner class used to combine map-outputs + before being sent to the reducers. + +

    The combiner is an application-specified aggregation operation, which + can help cut down the amount of data transferred between the + {@link Mapper} and the {@link Reducer}, leading to better performance.

    + +

    The framework may invoke the combiner 0, 1, or multiple times, in both + the mapper and reducer tasks. In general, the combiner is called as the + sort/merge result is written to disk. The combiner must: +

      +
    • be side-effect free
    • +
    • have the same input and output key types and the same input and + output value types
    • +

    + +

    Typically the combiner is same as the Reducer for the + job i.e. {@link #setReducerClass(Class)}.

    + + @param theClass the user-defined combiner class used to combine + map-outputs.]]> +
    +
    + + + true. + + @return true if speculative execution be used for this job, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on, else false.]]> + + + + + true. + + @return true if speculative execution be + used for this job for map tasks, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on for map tasks, + else false.]]> + + + + + true. + + @return true if speculative execution be used + for reduce tasks for this job, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on for reduce tasks, + else false.]]> + + + + + 1. + + @return the number of reduce tasks for this job.]]> + + + + + + Note: This is only a hint to the framework. The actual + number of spawned map tasks depends on the number of {@link InputSplit}s + generated by the job's {@link InputFormat#getSplits(JobConf, int)}. + + A custom {@link InputFormat} is typically used to accurately control + the number of map tasks for the job.

    + +

    How many maps?

    + +

    The number of maps is usually driven by the total size of the inputs + i.e. total number of blocks of the input files.

    + +

    The right level of parallelism for maps seems to be around 10-100 maps + per-node, although it has been set up to 300 or so for very cpu-light map + tasks. Task setup takes awhile, so it is best if the maps take at least a + minute to execute.

    + +

    The default behavior of file-based {@link InputFormat}s is to split the + input into logical {@link InputSplit}s based on the total size, in + bytes, of input files. However, the {@link FileSystem} blocksize of the + input files is treated as an upper bound for input splits. A lower bound + on the split size can be set via + + mapred.min.split.size.

    + +

    Thus, if you expect 10TB of input data and have a blocksize of 128MB, + you'll end up with 82,000 maps, unless {@link #setNumMapTasks(int)} is + used to set it even higher.

    + + @param n the number of map tasks for this job. + @see InputFormat#getSplits(JobConf, int) + @see FileInputFormat + @see FileSystem#getDefaultBlockSize() + @see FileStatus#getBlockSize()]]> +
    +
    + + + 1. + + @return the number of reduce tasks for this job.]]> + + + + + + How many reduces? + +

    The right number of reduces seems to be 0.95 or + 1.75 multiplied by (<no. of nodes> * + + mapred.tasktracker.reduce.tasks.maximum). +

    + +

    With 0.95 all of the reduces can launch immediately and + start transfering map outputs as the maps finish. With 1.75 + the faster nodes will finish their first round of reduces and launch a + second wave of reduces doing a much better job of load balancing.

    + +

    Increasing the number of reduces increases the framework overhead, but + increases load balancing and lowers the cost of failures.

    + +

    The scaling factors above are slightly less than whole numbers to + reserve a few reduce slots in the framework for speculative-tasks, failures + etc.

    + +

    Reducer NONE

    + +

    It is legal to set the number of reduce-tasks to zero.

    + +

    In this case the output of the map-tasks directly go to distributed + file-system, to the path set by + {@link FileOutputFormat#setOutputPath(JobConf, Path)}. Also, the + framework doesn't sort the map-outputs before writing it out to HDFS.

    + + @param n the number of reduce tasks for this job.]]> +
    +
    + + + mapred.map.max.attempts + property. If this property is not already set, the default is 4 attempts. + + @return the max number of attempts per map task.]]> + + + + + + + + + + + mapred.reduce.max.attempts + property. If this property is not already set, the default is 4 attempts. + + @return the max number of attempts per reduce task.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + noFailures, the + tasktracker is blacklisted for this job. + + @param noFailures maximum no. of failures of a given job per tasktracker.]]> + + + + + blacklisted for this job. + + @return the maximum no. of failures of a given job per tasktracker.]]> + + + + + failed. + + Defaults to zero, i.e. any failed map-task results in + the job being declared as {@link JobStatus#FAILED}. + + @return the maximum percentage of map tasks that can fail without + the job being aborted.]]> + + + + + + failed. + + @param percent the maximum percentage of map tasks that can fail without + the job being aborted.]]> + + + + + failed. + + Defaults to zero, i.e. any failed reduce-task results + in the job being declared as {@link JobStatus#FAILED}. + + @return the maximum percentage of reduce tasks that can fail without + the job being aborted.]]> + + + + + + failed. + + @param percent the maximum percentage of reduce tasks that can fail without + the job being aborted.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The debug script can aid debugging of failed map tasks. The script is + given task's stdout, stderr, syslog, jobconf files as arguments.

    + +

    The debug command, run on the node where the map failed, is:

    +

    + $script $stdout $stderr $syslog $jobconf. +

    + +

    The script file is distributed through {@link DistributedCache} + APIs. The script needs to be symlinked.

    + +

    Here is an example on how to submit a script +

    + job.setMapDebugScript("./myscript");
    + DistributedCache.createSymlink(job);
    + DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
    + 

    + + @param mDbgScript the script name]]> +
    +
    + + + + + + + + + The debug script can aid debugging of failed reduce tasks. The script + is given task's stdout, stderr, syslog, jobconf files as arguments.

    + +

    The debug command, run on the node where the map failed, is:

    +

    + $script $stdout $stderr $syslog $jobconf. +

    + +

    The script file is distributed through {@link DistributedCache} + APIs. The script file needs to be symlinked

    + +

    Here is an example on how to submit a script +

    + job.setReduceDebugScript("./myscript");
    + DistributedCache.createSymlink(job);
    + DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
    + 

    + + @param rDbgScript the script name]]> +
    +
    + + + + + + + + null if it hasn't + been set. + @see #setJobEndNotificationURI(String)]]> + + + + + + The uri can contain 2 special parameters: $jobId and + $jobStatus. Those, if present, are replaced by the job's + identifier and completion-status respectively.

    + +

    This is typically used by application-writers to implement chaining of + Map-Reduce jobs in an asynchronous manner.

    + + @param uri the job end notification uri + @see JobStatus + @see Job Completion and Chaining]]> +
    +
    + + + + When a job starts, a shared directory is created at location + + ${mapred.local.dir}/taskTracker/jobcache/$jobid/work/ . + This directory is exposed to the users through + job.local.dir . + So, the tasks can use this space + as scratch space and share files among them.

    + This value is available as System property also. + + @return The localized job specific shared directory]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + mapred.task.maxvmem is split into + mapred.job.map.memory.mb + and mapred.job.map.memory.mb,mapred + each of the new key are set + as mapred.task.maxvmem / 1024 + as new values are in MB + + @return The maximum amount of memory any task of this job will use, in + bytes. + @see #setMaxVirtualMemoryForTask(long) + @deprecated Use {@link #getMemoryForMapTask()} and + {@link #getMemoryForReduceTask()}]]> + + + + + + + mapred.task.maxvmem is split into + mapred.job.map.memory.mb + and mapred.job.map.memory.mb,mapred + each of the new key are set + as mapred.task.maxvmem / 1024 + as new values are in MB + + @param vmem Maximum amount of virtual memory in bytes any task of this job + can use. + @see #getMaxVirtualMemoryForTask() + @deprecated + Use {@link #setMemoryForMapTask(long mem)} and + Use {@link #setMemoryForReduceTask(long mem)}]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobConf is the primary interface for a user to describe a + map-reduce job to the Hadoop framework for execution. The framework tries to + faithfully execute the job as-is described by JobConf, however: +
      +
    1. + Some configuration parameters might have been marked as + + final by administrators and hence cannot be altered. +
    2. +
    3. + While some job parameters are straight-forward to set + (e.g. {@link #setNumReduceTasks(int)}), some parameters interact subtly + rest of the framework and/or job-configuration and is relatively more + complex for the user to control finely (e.g. {@link #setNumMapTasks(int)}). +
    4. +

    + +

    JobConf typically specifies the {@link Mapper}, combiner + (if any), {@link Partitioner}, {@link Reducer}, {@link InputFormat} and + {@link OutputFormat} implementations to be used etc. + +

    Optionally JobConf is used to specify other advanced facets + of the job such as Comparators to be used, files to be put in + the {@link DistributedCache}, whether or not intermediate and/or job outputs + are to be compressed (and how), debugability via user-provided scripts + ( {@link #setMapDebugScript(String)}/{@link #setReduceDebugScript(String)}), + for doing post-processing on task logs, task's stdout, stderr, syslog. + and etc.

    + +

    Here is an example on how to configure a job via JobConf:

    +

    +     // Create a new JobConf
    +     JobConf job = new JobConf(new Configuration(), MyJob.class);
    +     
    +     // Specify various job-specific parameters     
    +     job.setJobName("myjob");
    +     
    +     FileInputFormat.setInputPaths(job, new Path("in"));
    +     FileOutputFormat.setOutputPath(job, new Path("out"));
    +     
    +     job.setMapperClass(MyJob.MyMapper.class);
    +     job.setCombinerClass(MyJob.MyReducer.class);
    +     job.setReducerClass(MyJob.MyReducer.class);
    +     
    +     job.setInputFormat(SequenceFileInputFormat.class);
    +     job.setOutputFormat(SequenceFileOutputFormat.class);
    + 

    + + @see JobClient + @see ClusterStatus + @see Tool + @see DistributedCache + @deprecated Use {@link Configuration} instead]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + .]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + any job + run on the jobtracker started at 200707121733, we would use : +
     
    + JobID.getTaskIDsPattern("200707121733", null);
    + 
    + which will return : +
     "job_200707121733_[0-9]*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @return a regex pattern matching JobIDs]]> +
    +
    + + + An example JobID is : + job_200707121733_0003 , which represents the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse JobID strings, but rather + use appropriate constructors or {@link #forName(String)} method. + + @see TaskID + @see TaskAttemptID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + "N/A" + + @return Scheduling information associated to particular Job Queue]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + zero. + + @param conf configuration for the JobTracker. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Output pairs need not be of the same types as input pairs. A given + input pair may map to zero or many output pairs. Output pairs are + collected with calls to + {@link OutputCollector#collect(Object,Object)}.

    + +

    Applications can use the {@link Reporter} provided to report progress + or just indicate that they are alive. In scenarios where the application + takes an insignificant amount of time to process individual key/value + pairs, this is crucial since the framework might assume that the task has + timed-out and kill that task. The other way of avoiding this is to set + + mapred.task.timeout to a high-enough value (or even zero for no + time-outs).

    + + @param key the input key. + @param value the input value. + @param output collects mapped keys and values. + @param reporter facility to report progress.]]> +
    + + + Maps are the individual tasks which transform input records into a + intermediate records. The transformed intermediate records need not be of + the same type as the input records. A given input pair may map to zero or + many output pairs.

    + +

    The Hadoop Map-Reduce framework spawns one map task for each + {@link InputSplit} generated by the {@link InputFormat} for the job. + Mapper implementations can access the {@link JobConf} for the + job via the {@link JobConfigurable#configure(JobConf)} and initialize + themselves. Similarly they can use the {@link Closeable#close()} method for + de-initialization.

    + +

    The framework then calls + {@link #map(Object, Object, OutputCollector, Reporter)} + for each key/value pair in the InputSplit for that task.

    + +

    All intermediate values associated with a given output key are + subsequently grouped by the framework, and passed to a {@link Reducer} to + determine the final output. Users can control the grouping by specifying + a Comparator via + {@link JobConf#setOutputKeyComparatorClass(Class)}.

    + +

    The grouped Mapper outputs are partitioned per + Reducer. Users can control which keys (and hence records) go to + which Reducer by implementing a custom {@link Partitioner}. + +

    Users can optionally specify a combiner, via + {@link JobConf#setCombinerClass(Class)}, to perform local aggregation of the + intermediate outputs, which helps to cut down the amount of data transferred + from the Mapper to the Reducer. + +

    The intermediate, grouped outputs are always stored in + {@link SequenceFile}s. Applications can specify if and how the intermediate + outputs are to be compressed and which {@link CompressionCodec}s are to be + used via the JobConf.

    + +

    If the job has + zero + reduces then the output of the Mapper is directly written + to the {@link FileSystem} without grouping by keys.

    + +

    Example:

    +

    +     public class MyMapper<K extends WritableComparable, V extends Writable> 
    +     extends MapReduceBase implements Mapper<K, V, K, V> {
    +     
    +       static enum MyCounters { NUM_RECORDS }
    +       
    +       private String mapTaskId;
    +       private String inputFile;
    +       private int noRecords = 0;
    +       
    +       public void configure(JobConf job) {
    +         mapTaskId = job.get("mapred.task.id");
    +         inputFile = job.get("map.input.file");
    +       }
    +       
    +       public void map(K key, V val,
    +                       OutputCollector<K, V> output, Reporter reporter)
    +       throws IOException {
    +         // Process the <key, value> pair (assume this takes a while)
    +         // ...
    +         // ...
    +         
    +         // Let the framework know that we are alive, and kicking!
    +         // reporter.progress();
    +         
    +         // Process some more
    +         // ...
    +         // ...
    +         
    +         // Increment the no. of <key, value> pairs processed
    +         ++noRecords;
    +
    +         // Increment counters
    +         reporter.incrCounter(NUM_RECORDS, 1);
    +        
    +         // Every 100 records update application-level status
    +         if ((noRecords%100) == 0) {
    +           reporter.setStatus(mapTaskId + " processed " + noRecords + 
    +                              " from input-file: " + inputFile); 
    +         }
    +         
    +         // Output the result
    +         output.collect(key, val);
    +       }
    +     }
    + 

    + +

    Applications may write a custom {@link MapRunnable} to exert greater + control on map processing e.g. multi-threaded Mappers etc.

    + + @see JobConf + @see InputFormat + @see Partitioner + @see Reducer + @see MapReduceBase + @see MapRunnable + @see SequenceFile + @deprecated Use {@link org.apache.hadoop.mapreduce.Mapper} instead.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + Provides default no-op implementations for a few methods, most non-trivial + applications need to override some of them.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + <key, value> pairs. + +

    Mapping of input records to output records is complete when this method + returns.

    + + @param input the {@link RecordReader} to read the input records. + @param output the {@link OutputCollector} to collect the outputrecords. + @param reporter {@link Reporter} to report progress, status-updates etc. + @throws IOException]]> +
    +
    + + Custom implementations of MapRunnable can exert greater + control on map processing e.g. multi-threaded, asynchronous mappers etc.

    + + @see Mapper + @deprecated Use {@link org.apache.hadoop.mapreduce.Mapper} instead.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + nearly + equal content length.
    + Subclasses implement {@link #getRecordReader(InputSplit, JobConf, Reporter)} + to construct RecordReader's for MultiFileSplit's. + @see MultiFileSplit + @deprecated Use {@link org.apache.hadoop.mapred.lib.CombineFileInputFormat} instead]]> +
    +
    + + + + + + + + + + + + + MultiFileSplit can be used to implement {@link RecordReader}'s, with + reading one record per file. + @see FileSplit + @see MultiFileInputFormat + @deprecated Use {@link org.apache.hadoop.mapred.lib.CombineFileSplit} instead]]> + + + + + + + + + + + + + + + <key, value> pairs output by {@link Mapper}s + and {@link Reducer}s. + +

    OutputCollector is the generalization of the facility + provided by the Map-Reduce framework to collect data output by either the + Mapper or the Reducer i.e. intermediate outputs + or the output of the job.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + OutputCommitter describes the commit of task output for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the OutputCommitter of + the job to:

    +

      +
    1. + Setup the job during initialization. For example, create the temporary + output directory for the job during the initialization of the job. +
    2. +
    3. + Cleanup the job after the job completion. For example, remove the + temporary output directory after the job completion. +
    4. +
    5. + Setup the task temporary output. +
    6. +
    7. + Check whether a task needs a commit. This is to avoid the commit + procedure if a task does not need commit. +
    8. +
    9. + Commit of the task output. +
    10. +
    11. + Discard the task commit. +
    12. +
    + + @see FileOutputCommitter + @see JobContext + @see TaskAttemptContext + @deprecated Use {@link org.apache.hadoop.mapreduce.OutputCommitter} instead.]]> +
    +
    + + + + + + + + + + + + + + + + + + + This is to validate the output specification for the job when it is + a job is submitted. Typically checks that it does not already exist, + throwing an exception when it already exists, so that output is not + overwritten.

    + + @param ignored + @param job job configuration. + @throws IOException when output should not be attempted]]> +
    +
    + + OutputFormat describes the output-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the OutputFormat of the + job to:

    +

      +
    1. + Validate the output-specification of the job. For e.g. check that the + output directory doesn't already exist. +
    2. + Provide the {@link RecordWriter} implementation to be used to write out + the output files of the job. Output files are stored in a + {@link FileSystem}. +
    3. +
    + + @see RecordWriter + @see JobConf + @deprecated Use {@link org.apache.hadoop.mapreduce.OutputFormat} instead.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + Typically a hash function on a all or a subset of the key.

    + + @param key the key to be paritioned. + @param value the entry value. + @param numPartitions the total number of partitions. + @return the partition number for the key.]]> +
    +
    + + Partitioner controls the partitioning of the keys of the + intermediate map-outputs. The key (or a subset of the key) is used to derive + the partition, typically by a hash function. The total number of partitions + is the same as the number of reduce tasks for the job. Hence this controls + which of the m reduce tasks the intermediate key (and hence the + record) is sent for reduction.

    + + @see Reducer + @deprecated Use {@link org.apache.hadoop.mapreduce.Partitioner} instead.]]> +
    +
    + + + + + + + + + + + + + + + + + + + true if there exists a key/value, + false otherwise. + @throws IOException]]> + + + + + + + + + + + + + + + RawKeyValueIterator is an iterator used to iterate over + the raw keys and values during sort/merge of intermediate data.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.0 to 1.0. + @throws IOException]]> + + + + RecordReader reads <key, value> pairs from an + {@link InputSplit}. + +

    RecordReader, typically, converts the byte-oriented view of + the input, provided by the InputSplit, and presents a + record-oriented view for the {@link Mapper} & {@link Reducer} tasks for + processing. It thus assumes the responsibility of processing record + boundaries and presenting the tasks with keys and values.

    + + @see InputSplit + @see InputFormat]]> +
    +
    + + + + + + + + + + + + + + + + RecordWriter to future operations. + + @param reporter facility to report progress. + @throws IOException]]> + + + + RecordWriter writes the output <key, value> pairs + to an output file. + +

    RecordWriter implementations write the job outputs to the + {@link FileSystem}. + + @see OutputFormat]]> + + + + + + + + + + + + + + + Reduces values for a given key. + +

    The framework calls this method for each + <key, (list of values)> pair in the grouped inputs. + Output values must be of the same type as input values. Input keys must + not be altered. The framework will reuse the key and value objects + that are passed into the reduce, therefore the application should clone + the objects they want to keep a copy of. In many cases, all values are + combined into zero or one value. +

    + +

    Output pairs are collected with calls to + {@link OutputCollector#collect(Object,Object)}.

    + +

    Applications can use the {@link Reporter} provided to report progress + or just indicate that they are alive. In scenarios where the application + takes an insignificant amount of time to process individual key/value + pairs, this is crucial since the framework might assume that the task has + timed-out and kill that task. The other way of avoiding this is to set + + mapred.task.timeout to a high-enough value (or even zero for no + time-outs).

    + + @param key the key. + @param values the list of values to reduce. + @param output to collect keys and combined values. + @param reporter facility to report progress.]]> +
    + + + The number of Reducers for the job is set by the user via + {@link JobConf#setNumReduceTasks(int)}. Reducer implementations + can access the {@link JobConf} for the job via the + {@link JobConfigurable#configure(JobConf)} method and initialize themselves. + Similarly they can use the {@link Closeable#close()} method for + de-initialization.

    + +

    Reducer has 3 primary phases:

    +
      +
    1. + +

      Shuffle

      + +

      Reducer is input the grouped output of a {@link Mapper}. + In the phase the framework, for each Reducer, fetches the + relevant partition of the output of all the Mappers, via HTTP. +

      +
    2. + +
    3. +

      Sort

      + +

      The framework groups Reducer inputs by keys + (since different Mappers may have output the same key) in this + stage.

      + +

      The shuffle and sort phases occur simultaneously i.e. while outputs are + being fetched they are merged.

      + +
      SecondarySort
      + +

      If equivalence rules for keys while grouping the intermediates are + different from those for grouping keys before reduction, then one may + specify a Comparator via + {@link JobConf#setOutputValueGroupingComparator(Class)}.Since + {@link JobConf#setOutputKeyComparatorClass(Class)} can be used to + control how intermediate keys are grouped, these can be used in conjunction + to simulate secondary sort on values.

      + + + For example, say that you want to find duplicate web pages and tag them + all with the url of the "best" known example. You would set up the job + like: +
        +
      • Map Input Key: url
      • +
      • Map Input Value: document
      • +
      • Map Output Key: document checksum, url pagerank
      • +
      • Map Output Value: url
      • +
      • Partitioner: by checksum
      • +
      • OutputKeyComparator: by checksum and then decreasing pagerank
      • +
      • OutputValueGroupingComparator: by checksum
      • +
      +
    4. + +
    5. +

      Reduce

      + +

      In this phase the + {@link #reduce(Object, Iterator, OutputCollector, Reporter)} + method is called for each <key, (list of values)> pair in + the grouped inputs.

      +

      The output of the reduce task is typically written to the + {@link FileSystem} via + {@link OutputCollector#collect(Object, Object)}.

      +
    6. +
    + +

    The output of the Reducer is not re-sorted.

    + +

    Example:

    +

    +     public class MyReducer<K extends WritableComparable, V extends Writable> 
    +     extends MapReduceBase implements Reducer<K, V, K, V> {
    +     
    +       static enum MyCounters { NUM_RECORDS }
    +        
    +       private String reduceTaskId;
    +       private int noKeys = 0;
    +       
    +       public void configure(JobConf job) {
    +         reduceTaskId = job.get("mapred.task.id");
    +       }
    +       
    +       public void reduce(K key, Iterator<V> values,
    +                          OutputCollector<K, V> output, 
    +                          Reporter reporter)
    +       throws IOException {
    +       
    +         // Process
    +         int noValues = 0;
    +         while (values.hasNext()) {
    +           V value = values.next();
    +           
    +           // Increment the no. of values for this key
    +           ++noValues;
    +           
    +           // Process the <key, value> pair (assume this takes a while)
    +           // ...
    +           // ...
    +           
    +           // Let the framework know that we are alive, and kicking!
    +           if ((noValues%10) == 0) {
    +             reporter.progress();
    +           }
    +         
    +           // Process some more
    +           // ...
    +           // ...
    +           
    +           // Output the <key, value> 
    +           output.collect(key, value);
    +         }
    +         
    +         // Increment the no. of <key, list of values> pairs processed
    +         ++noKeys;
    +         
    +         // Increment counters
    +         reporter.incrCounter(NUM_RECORDS, 1);
    +         
    +         // Every 100 keys update application-level status
    +         if ((noKeys%100) == 0) {
    +           reporter.setStatus(reduceTaskId + " processed " + noKeys);
    +         }
    +       }
    +     }
    + 

    + + @see Mapper + @see Partitioner + @see Reporter + @see MapReduceBase + @deprecated Use {@link org.apache.hadoop.mapreduce.Reducer} instead.]]> +
    +
    + + + + + + + + + + + + + + Counter of the given group/name.]]> + + + + + + + Counter of the given group/name.]]> + + + + + + + Enum. + @param amount A non-negative amount by which the counter is to + be incremented.]]> + + + + + + + + + + + + + + InputSplit that the map is reading from. + @throws UnsupportedOperationException if called outside a mapper]]> + + + + + + + + + {@link Mapper} and {@link Reducer} can use the Reporter + provided to report progress or just indicate that they are alive. In + scenarios where the application takes an insignificant amount of time to + process individual key/value pairs, this is crucial since the framework + might assume that the task has timed-out and kill that task. + +

    Applications can also update {@link Counters} via the provided + Reporter .

    + + @see Progressable + @see Counters]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + progress of the job's map-tasks, as a float between 0.0 + and 1.0. When all map tasks have completed, the function returns 1.0. + + @return the progress of the job's map-tasks. + @throws IOException]]> + + + + + + progress of the job's reduce-tasks, as a float between 0.0 + and 1.0. When all reduce tasks have completed, the function returns 1.0. + + @return the progress of the job's reduce-tasks. + @throws IOException]]> + + + + + + progress of the job's cleanup-tasks, as a float between 0.0 + and 1.0. When all cleanup tasks have completed, the function returns 1.0. + + @return the progress of the job's cleanup-tasks. + @throws IOException]]> + + + + + + progress of the job's setup-tasks, as a float between 0.0 + and 1.0. When all setup tasks have completed, the function returns 1.0. + + @return the progress of the job's setup-tasks. + @throws IOException]]> + + + + + + true if the job is complete, else false. + @throws IOException]]> + + + + + + true if the job succeeded, else false. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RunningJob is the user-interface to query for details on a + running Map-Reduce job. + +

    Clients can get hold of RunningJob via the {@link JobClient} + and then query the running-job for details such as name, configuration, + progress etc.

    + + @see JobClient]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This allows the user to specify the key class to be different + from the actual class ({@link BytesWritable}) used for writing

    + + @param conf the {@link JobConf} to modify + @param theClass the SequenceFile output key class.]]> +
    +
    + + + + + This allows the user to specify the value class to be different + from the actual class ({@link BytesWritable}) used for writing

    + + @param conf the {@link JobConf} to modify + @param theClass the SequenceFile output key class.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + f. The filtering criteria is + MD5(key) % f == 0.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + f using + the criteria record# % f == 0. + For example, if the frequency is 10, one out of 10 records is returned.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if auto increment + {@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS}. + false otherwise.]]> + + + + + + + + + + + + + true if auto increment + {@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS}. + false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Hadoop provides an optional mode of execution in which the bad records + are detected and skipped in further attempts. + +

    This feature can be used when map/reduce tasks crashes deterministically on + certain input. This happens due to bugs in the map/reduce function. The usual + course would be to fix these bugs. But sometimes this is not possible; + perhaps the bug is in third party libraries for which the source code is + not available. Due to this, the task never reaches to completion even with + multiple attempts and complete data for that task is lost.

    + +

    With this feature, only a small portion of data is lost surrounding + the bad record, which may be acceptable for some user applications. + see {@link SkipBadRecords#setMapperMaxSkipRecords(Configuration, long)}

    + +

    The skipping mode gets kicked off after certain no of failures + see {@link SkipBadRecords#setAttemptsToStartSkipping(Configuration, int)}

    + +

    In the skipping mode, the map/reduce task maintains the record range which + is getting processed at all times. Before giving the input to the + map/reduce function, it sends this record range to the Task tracker. + If task crashes, the Task tracker knows which one was the last reported + range. On further attempts that range get skipped.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + all task attempt IDs + of any jobtracker, in any job, of the first + map task, we would use : +
     
    + TaskAttemptID.getTaskAttemptIDsPattern(null, null, true, 1, null);
    + 
    + which will return : +
     "attempt_[^_]*_[0-9]*_m_000001_[0-9]*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @param isMap whether the tip is a map, or null + @param taskId taskId number, or null + @param attemptId the task attempt number, or null + @return a regex pattern matching TaskAttemptIDs]]> +
    +
    + + + An example TaskAttemptID is : + attempt_200707121733_0003_m_000005_0 , which represents the + zeroth task attempt for the fifth map task in the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse TaskAttemptID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the first map task + of any jobtracker, of any job, we would use : +

     
    + TaskID.getTaskIDsPattern(null, null, true, 1);
    + 
    + which will return : +
     "task_[^_]*_[0-9]*_m_000001*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @param isMap whether the tip is a map, or null + @param taskId taskId number, or null + @return a regex pattern matching TaskIDs]]> +
    + + + + + + + + An example TaskID is : + task_200707121733_0003_m_000005 , which represents the + fifth map task in the third job running at the jobtracker + started at 200707121733. +

    + Applications should never construct or parse TaskID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskAttemptID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hadoop.log.dir.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the Job was added.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ([,]*) + func ::= tbl(,"") + class ::= @see java.lang.Class#forName(java.lang.String) + path ::= @see org.apache.hadoop.fs.Path#Path(java.lang.String) + } + Reads expression from the mapred.join.expr property and + user-supplied join types from mapred.join.define.<ident> + types. Paths supplied to tbl are given as input paths to the + InputFormat class listed. + @see #compose(java.lang.String, java.lang.Class, java.lang.String...)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ,

    ) }]]> + + + + + + + + (tbl(,),tbl(,),...,tbl(,)) }]]> + + + + + + + + (tbl(,),tbl(,),...,tbl(,)) }]]> + + + + mapred.join.define.<ident> to a classname. In the expression + mapred.join.expr, the identifier will be assumed to be a + ComposableRecordReader. + mapred.join.keycomparator can be a classname used to compare keys + in the join. + @see JoinRecordReader + @see MultiFilterRecordReader]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ...... + }]]> + + + + + + + + + + + + + + + + + + + + + capacity children to position + id in the parent reader. + The id of a root CompositeRecordReader is -1 by convention, but relying + on this is not recommended.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + override(S1,S2,S3) will prefer values + from S3 over S2, and values from S2 over S1 for all keys + emitted from all sources.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [,,...,]]]> + + + + + + + out. + TupleWritable format: + {@code + ...... + }]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It has to be specified how key and values are passed from one element of + the chain to the next, by value or by reference. If a Mapper leverages the + assumed semantics that the key and values are not modified by the collector + 'by value' must be used. If the Mapper does not expect this semantics, as + an optimization to avoid serialization and deserialization 'by reference' + can be used. +

    + For the added Mapper the configuration given for it, + mapperConf, have precedence over the job's JobConf. This + precedence is in effect when the task is running. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainMapper, this is done by the addMapper for the last mapper in the chain +

    + + @param job job's JobConf to add the Mapper class. + @param klass the Mapper class to add. + @param inputKeyClass mapper input key class. + @param inputValueClass mapper input value class. + @param outputKeyClass mapper output key class. + @param outputValueClass mapper output value class. + @param byValue indicates if key/values should be passed by value + to the next Mapper in the chain, if any. + @param mapperConf a JobConf with the configuration for the Mapper + class. It is recommended to use a JobConf without default values using the + JobConf(boolean loadDefaults) constructor with FALSE.]]> + + + + + + + If this method is overriden super.configure(...) should be + invoked at the beginning of the overwriter method.]]> + + + + + + + + + + map(...) methods of the Mappers in the chain.]]> + + + + + + + If this method is overriden super.close() should be + invoked at the end of the overwriter method.]]> + + + + + The Mapper classes are invoked in a chained (or piped) fashion, the output of + the first becomes the input of the second, and so on until the last Mapper, + the output of the last Mapper will be written to the task's output. +

    + The key functionality of this feature is that the Mappers in the chain do not + need to be aware that they are executed in a chain. This enables having + reusable specialized Mappers that can be combined to perform composite + operations within a single task. +

    + Special care has to be taken when creating chains that the key/values output + by a Mapper are valid for the following Mapper in the chain. It is assumed + all Mappers and the Reduce in the chain use maching output and input key and + value classes as no conversion is done by the chaining code. +

    + Using the ChainMapper and the ChainReducer classes is possible to compose + Map/Reduce jobs that look like [MAP+ / REDUCE MAP*]. And + immediate benefit of this pattern is a dramatic reduction in disk IO. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainMapper, this is done by the addMapper for the last mapper in the chain. +

    + ChainMapper usage pattern: +

    +

    + ...
    + conf.setJobName("chain");
    + conf.setInputFormat(TextInputFormat.class);
    + conf.setOutputFormat(TextOutputFormat.class);
    + 

    + JobConf mapAConf = new JobConf(false); + ... + ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class, + Text.class, Text.class, true, mapAConf); +

    + JobConf mapBConf = new JobConf(false); + ... + ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class, + LongWritable.class, Text.class, false, mapBConf); +

    + JobConf reduceConf = new JobConf(false); + ... + ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class, + Text.class, Text.class, true, reduceConf); +

    + ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class, + LongWritable.class, Text.class, false, null); +

    + ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class, + LongWritable.class, LongWritable.class, true, null); +

    + FileInputFormat.setInputPaths(conf, inDir); + FileOutputFormat.setOutputPath(conf, outDir); + ... +

    + JobClient jc = new JobClient(conf); + RunningJob job = jc.submitJob(conf); + ... +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + It has to be specified how key and values are passed from one element of + the chain to the next, by value or by reference. If a Reducer leverages the + assumed semantics that the key and values are not modified by the collector + 'by value' must be used. If the Reducer does not expect this semantics, as + an optimization to avoid serialization and deserialization 'by reference' + can be used. +

    + For the added Reducer the configuration given for it, + reducerConf, have precedence over the job's JobConf. This + precedence is in effect when the task is running. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainReducer, this is done by the setReducer or the addMapper for the last + element in the chain. + + @param job job's JobConf to add the Reducer class. + @param klass the Reducer class to add. + @param inputKeyClass reducer input key class. + @param inputValueClass reducer input value class. + @param outputKeyClass reducer output key class. + @param outputValueClass reducer output value class. + @param byValue indicates if key/values should be passed by value + to the next Mapper in the chain, if any. + @param reducerConf a JobConf with the configuration for the Reducer + class. It is recommended to use a JobConf without default values using the + JobConf(boolean loadDefaults) constructor with FALSE.]]> + + + + + + + + + + + + + + It has to be specified how key and values are passed from one element of + the chain to the next, by value or by reference. If a Mapper leverages the + assumed semantics that the key and values are not modified by the collector + 'by value' must be used. If the Mapper does not expect this semantics, as + an optimization to avoid serialization and deserialization 'by reference' + can be used. +

    + For the added Mapper the configuration given for it, + mapperConf, have precedence over the job's JobConf. This + precedence is in effect when the task is running. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainMapper, this is done by the addMapper for the last mapper in the chain + . + + @param job chain job's JobConf to add the Mapper class. + @param klass the Mapper class to add. + @param inputKeyClass mapper input key class. + @param inputValueClass mapper input value class. + @param outputKeyClass mapper output key class. + @param outputValueClass mapper output value class. + @param byValue indicates if key/values should be passed by value + to the next Mapper in the chain, if any. + @param mapperConf a JobConf with the configuration for the Mapper + class. It is recommended to use a JobConf without default values using the + JobConf(boolean loadDefaults) constructor with FALSE.]]> + + + + + + + If this method is overriden super.configure(...) should be + invoked at the beginning of the overwriter method.]]> + + + + + + + + + + reduce(...) method of the Reducer with the + map(...) methods of the Mappers in the chain.]]> + + + + + + + If this method is overriden super.close() should be + invoked at the end of the overwriter method.]]> + + + + + For each record output by the Reducer, the Mapper classes are invoked in a + chained (or piped) fashion, the output of the first becomes the input of the + second, and so on until the last Mapper, the output of the last Mapper will + be written to the task's output. +

    + The key functionality of this feature is that the Mappers in the chain do not + need to be aware that they are executed after the Reducer or in a chain. + This enables having reusable specialized Mappers that can be combined to + perform composite operations within a single task. +

    + Special care has to be taken when creating chains that the key/values output + by a Mapper are valid for the following Mapper in the chain. It is assumed + all Mappers and the Reduce in the chain use maching output and input key and + value classes as no conversion is done by the chaining code. +

    + Using the ChainMapper and the ChainReducer classes is possible to compose + Map/Reduce jobs that look like [MAP+ / REDUCE MAP*]. And + immediate benefit of this pattern is a dramatic reduction in disk IO. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainReducer, this is done by the setReducer or the addMapper for the last + element in the chain. +

    + ChainReducer usage pattern: +

    +

    + ...
    + conf.setJobName("chain");
    + conf.setInputFormat(TextInputFormat.class);
    + conf.setOutputFormat(TextOutputFormat.class);
    + 

    + JobConf mapAConf = new JobConf(false); + ... + ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class, + Text.class, Text.class, true, mapAConf); +

    + JobConf mapBConf = new JobConf(false); + ... + ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class, + LongWritable.class, Text.class, false, mapBConf); +

    + JobConf reduceConf = new JobConf(false); + ... + ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class, + Text.class, Text.class, true, reduceConf); +

    + ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class, + LongWritable.class, Text.class, false, null); +

    + ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class, + LongWritable.class, LongWritable.class, true, null); +

    + FileInputFormat.setInputPaths(conf, inDir); + FileOutputFormat.setOutputPath(conf, outDir); + ... +

    + JobClient jc = new JobClient(conf); + RunningJob job = jc.submitJob(conf); + ... +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RecordReader's for CombineFileSplit's. + @see CombineFileSplit]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + th Path]]> + + + + + + th Path]]> + + + + + + + + + + + th Path]]> + + + + + + + + + + + + + + + + + + + + + + + + + + CombineFileSplit can be used to implement {@link org.apache.hadoop.mapred.RecordReader}'s, + with reading one record per file. + @see org.apache.hadoop.mapred.FileSplit + @see CombineFileInputFormat]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + all splits. + @param freq The frequency with which records will be emitted.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + all splits. + This will read every split at the client, which is very expensive. + @param freq Probability with which a key will be chosen. + @param numSamples Total number of samples to obtain from all selected + splits.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + all splits. + Takes the first numSamples / numSplits records from each split. + @param numSamples Total number of samples to obtain from all selected + splits.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the name output is multi, false + if it is single. If the name output is not defined it returns + false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @param conf job conf to add the named output + @param namedOutput named output name, it has to be a word, letters + and numbers only, cannot be the word 'part' as + that is reserved for the + default output. + @param outputFormatClass OutputFormat class. + @param keyClass key class + @param valueClass value class]]> + + + + + + + + + + + + @param conf job conf to add the named output + @param namedOutput named output name, it has to be a word, letters + and numbers only, cannot be the word 'part' as + that is reserved for the + default output. + @param outputFormatClass OutputFormat class. + @param keyClass key class + @param valueClass value class]]> + + + + + + + + By default these counters are disabled. +

    + MultipleOutputs supports counters, by default the are disabled. + The counters group is the {@link MultipleOutputs} class name. +

    + The names of the counters are the same as the named outputs. For multi + named outputs the name of the counter is the concatenation of the named + output, and underscore '_' and the multiname. + + @param conf job conf to enableadd the named output. + @param enabled indicates if the counters will be enabled or not.]]> +
    +
    + + + + + By default these counters are disabled. +

    + MultipleOutputs supports counters, by default the are disabled. + The counters group is the {@link MultipleOutputs} class name. +

    + The names of the counters are the same as the named outputs. For multi + named outputs the name of the counter is the concatenation of the named + output, and underscore '_' and the multiname. + + + @param conf job conf to enableadd the named output. + @return TRUE if the counters are enabled, FALSE if they are disabled.]]> +
    +
    + + + + + + + + + + + + + @param namedOutput the named output name + @param reporter the reporter + @return the output collector for the given named output + @throws IOException thrown if output collector could not be created]]> + + + + + + + + + + + @param namedOutput the named output name + @param multiName the multi name part + @param reporter the reporter + @return the output collector for the given named output + @throws IOException thrown if output collector could not be created]]> + + + + + + + If overriden subclasses must invoke super.close() at the + end of their close() + + @throws java.io.IOException thrown if any of the MultipleOutput files + could not be closed properly.]]> + + + + OutputCollector passed to + the map() and reduce() methods of the + Mapper and Reducer implementations. +

    + Each additional output, or named output, may be configured with its own + OutputFormat, with its own key class and with its own value + class. +

    + A named output can be a single file or a multi file. The later is refered as + a multi named output. +

    + A multi named output is an unbound set of files all sharing the same + OutputFormat, key class and value class configuration. +

    + When named outputs are used within a Mapper implementation, + key/values written to a name output are not part of the reduce phase, only + key/values written to the job OutputCollector are part of the + reduce phase. +

    + MultipleOutputs supports counters, by default the are disabled. The counters + group is the {@link MultipleOutputs} class name. +

    + The names of the counters are the same as the named outputs. For multi + named outputs the name of the counter is the concatenation of the named + output, and underscore '_' and the multiname. +

    + Job configuration usage pattern is: +

    +
    + JobConf conf = new JobConf();
    +
    + conf.setInputPath(inDir);
    + FileOutputFormat.setOutputPath(conf, outDir);
    +
    + conf.setMapperClass(MOMap.class);
    + conf.setReducerClass(MOReduce.class);
    + ...
    +
    + // Defines additional single text based output 'text' for the job
    + MultipleOutputs.addNamedOutput(conf, "text", TextOutputFormat.class,
    + LongWritable.class, Text.class);
    +
    + // Defines additional multi sequencefile based output 'sequence' for the
    + // job
    + MultipleOutputs.addMultiNamedOutput(conf, "seq",
    +   SequenceFileOutputFormat.class,
    +   LongWritable.class, Text.class);
    + ...
    +
    + JobClient jc = new JobClient();
    + RunningJob job = jc.submitJob(conf);
    +
    + ...
    + 
    +

    + Job configuration usage pattern is: +

    +
    + public class MOReduce implements
    +   Reducer<WritableComparable, Writable> {
    + private MultipleOutputs mos;
    +
    + public void configure(JobConf conf) {
    + ...
    + mos = new MultipleOutputs(conf);
    + }
    +
    + public void reduce(WritableComparable key, Iterator<Writable> values,
    + OutputCollector output, Reporter reporter)
    + throws IOException {
    + ...
    + mos.getCollector("text", reporter).collect(key, new Text("Hello"));
    + mos.getCollector("seq", "A", reporter).collect(key, new Text("Bye"));
    + mos.getCollector("seq", "B", reporter).collect(key, new Text("Chau"));
    + ...
    + }
    +
    + public void close() throws IOException {
    + mos.close();
    + ...
    + }
    +
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It can be used instead of the default implementation, + @link org.apache.hadoop.mapred.MapRunner, when the Map operation is not CPU + bound in order to improve throughput. +

    + Map implementations using this MapRunnable must be thread-safe. +

    + The Map-Reduce job has to be configured to use this MapRunnable class (using + the JobConf.setMapRunnerClass method) and + the number of thread the thread-pool can use with the + mapred.map.multithreadedrunner.threads property, its default + value is 10 threads. +

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + pairs. Uses + {@link StringTokenizer} to break text into tokens. + @deprecated Use + {@link org.apache.hadoop.mapreduce.lib.map.TokenCounterMapper} instead.]]> + + + + + + + + + + + + total.order.partitioner.natural.order is not false, a trie + of the first total.order.partitioner.max.trie.depth(2) + 1 bytes + will be built. Otherwise, keys will be located using a binary search of + the partition keyset using the {@link org.apache.hadoop.io.RawComparator} + defined for this job. The input file must be sorted with the same + comparator and contain {@link + org.apache.hadoop.mapred.JobConf#getNumReduceTasks} - 1 keys.]]> + + + + + + + + + + + + R reduces, there are R-1 + keys in the SequenceFile.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + generateKeyValPairs(Object key, Object value); public void + configure(JobConfjob); } + + The package also provides a base class, ValueAggregatorBaseDescriptor, + implementing the above interface. The user can extend the base class and + implement generateKeyValPairs accordingly. + + The primary work of generateKeyValPairs is to emit one or more key/value + pairs based on the input key/value pair. The key in an output key/value pair + encode two pieces of information: aggregation type and aggregation id. The + value will be aggregated onto the aggregation id according the aggregation + type. + + This class offers a function to generate a map/reduce job using Aggregate + framework. The function takes the following parameters: input directory spec + input format (text or sequence file) output directory a file specifying the + user plugin class]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The job can be configured using the static methods in this class, + {@link DBInputFormat}, and {@link DBOutputFormat}. +

    + Alternatively, the properties can be set in the configuration with proper + values. + + @see DBConfiguration#configureDB(JobConf, String, String, String, String) + @see DBInputFormat#setInput(JobConf, Class, String, String) + @see DBInputFormat#setInput(JobConf, Class, String, String, String, String...) + @see DBOutputFormat#setOutput(JobConf, String, String...)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 20070101 AND length > 0)' + @param orderBy the fieldNames in the orderBy clause. + @param fieldNames The field names in the table + @see #setInput(JobConf, Class, String, String)]]> + + + + + + + + + + + + + + DBInputFormat emits LongWritables containing the record number as + key and DBWritables as value. + + The SQL query, and input class can be using one of the two + setInput methods.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {@link DBOutputFormat} accepts <key,value> pairs, where + key has a type extending DBWritable. Returned {@link RecordWriter} + writes only the key to the database with a batch SQL query.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DBWritable. DBWritable, is similar to {@link Writable} + except that the {@link #write(PreparedStatement)} method takes a + {@link PreparedStatement}, and {@link #readFields(ResultSet)} + takes a {@link ResultSet}. +

    + Implementations are responsible for writing the fields of the object + to PreparedStatement, and reading the fields of the object from the + ResultSet. + +

    Example:

    + If we have the following table in the database : +
    + CREATE TABLE MyTable (
    +   counter        INTEGER NOT NULL,
    +   timestamp      BIGINT  NOT NULL,
    + );
    + 
    + then we can read/write the tuples from/to the table with : +

    + public class MyWritable implements Writable, DBWritable {
    +   // Some data     
    +   private int counter;
    +   private long timestamp;
    +       
    +   //Writable#write() implementation
    +   public void write(DataOutput out) throws IOException {
    +     out.writeInt(counter);
    +     out.writeLong(timestamp);
    +   }
    +       
    +   //Writable#readFields() implementation
    +   public void readFields(DataInput in) throws IOException {
    +     counter = in.readInt();
    +     timestamp = in.readLong();
    +   }
    +       
    +   public void write(PreparedStatement statement) throws SQLException {
    +     statement.setInt(1, counter);
    +     statement.setLong(2, timestamp);
    +   }
    +       
    +   public void readFields(ResultSet resultSet) throws SQLException {
    +     counter = resultSet.getInt(1);
    +     timestamp = resultSet.getLong(2);
    +   } 
    + }
    + 

    ]]> +
    + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Counters represent global counters, defined either by the + Map-Reduce framework or applications. Each Counter is named by + an {@link Enum} and has a long for the value.

    + +

    Counters are bunched into Groups, each comprising of + counters from a particular Enum class.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Each {@link InputSplit} is then assigned to an individual {@link Mapper} + for processing.

    + +

    Note: The split is a logical split of the inputs and the + input files are not physically split into chunks. For e.g. a split could + be <input-file-path, start, offset> tuple. The InputFormat + also creates the {@link RecordReader} to read the {@link InputSplit}. + + @param context job configuration. + @return an array of {@link InputSplit}s for the job.]]> + + + + + + + + + + + + + InputFormat describes the input-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the InputFormat of the + job to:

    +

      +
    1. + Validate the input-specification of the job. +
    2. + Split-up the input file(s) into logical {@link InputSplit}s, each of + which is then assigned to an individual {@link Mapper}. +
    3. +
    4. + Provide the {@link RecordReader} implementation to be used to glean + input records from the logical InputSplit for processing by + the {@link Mapper}. +
    5. +
    + +

    The default behavior of file-based {@link InputFormat}s, typically + sub-classes of {@link FileInputFormat}, is to split the + input into logical {@link InputSplit}s based on the total size, in + bytes, of the input files. However, the {@link FileSystem} blocksize of + the input files is treated as an upper bound for input splits. A lower bound + on the split size can be set via + + mapred.min.split.size.

    + +

    Clearly, logical splits based on input-size is insufficient for many + applications since record boundaries are to respected. In such cases, the + application has to also implement a {@link RecordReader} on whom lies the + responsibility to respect record-boundaries and present a record-oriented + view of the logical InputSplit to the individual task. + + @see InputSplit + @see RecordReader + @see FileInputFormat]]> + + + + + + + + + + + + + + + + + + + + + + + InputSplit represents the data to be processed by an + individual {@link Mapper}. + +

    Typically, it presents a byte-oriented view on the input and is the + responsibility of {@link RecordReader} of the job to process this and present + a record-oriented view. + + @see InputFormat + @see RecordReader]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + InputFormat to use + @throws IllegalStateException if the job is submitted]]> + + + + + + + OutputFormat to use + @throws IllegalStateException if the job is submitted]]> + + + + + + + Mapper to use + @throws IllegalStateException if the job is submitted]]> + + + + + + + + + + + + + + + + + + + + + + + + + Reducer to use + @throws IllegalStateException if the job is submitted]]> + + + + + + + Partitioner to use + @throws IllegalStateException if the job is submitted]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + progress of the job's map-tasks, as a float between 0.0 + and 1.0. When all map tasks have completed, the function returns 1.0. + + @return the progress of the job's map-tasks. + @throws IOException]]> + + + + + + progress of the job's reduce-tasks, as a float between 0.0 + and 1.0. When all reduce tasks have completed, the function returns 1.0. + + @return the progress of the job's reduce-tasks. + @throws IOException]]> + + + + + + true if the job is complete, else false. + @throws IOException]]> + + + + + + true if the job succeeded, else false. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobTracker is lost]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1. + @return the number of reduce tasks for this job.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + An example JobID is : + job_200707121733_0003 , which represents the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse JobID strings, but rather + use appropriate constructors or {@link #forName(String)} method. + + @see TaskID + @see TaskAttemptID + @see org.apache.hadoop.mapred.JobTracker#getNewJobId() + @see org.apache.hadoop.mapred.JobTracker#getStartTime()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + the key input type to the Mapper + @param the value input type to the Mapper + @param the key output type from the Mapper + @param the value output type from the Mapper]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Maps are the individual tasks which transform input records into a + intermediate records. The transformed intermediate records need not be of + the same type as the input records. A given input pair may map to zero or + many output pairs.

    + +

    The Hadoop Map-Reduce framework spawns one map task for each + {@link InputSplit} generated by the {@link InputFormat} for the job. + Mapper implementations can access the {@link Configuration} for + the job via the {@link JobContext#getConfiguration()}. + +

    The framework first calls + {@link #setup(org.apache.hadoop.mapreduce.Mapper.Context)}, followed by + {@link #map(Object, Object, Context)} + for each key/value pair in the InputSplit. Finally + {@link #cleanup(Context)} is called.

    + +

    All intermediate values associated with a given output key are + subsequently grouped by the framework, and passed to a {@link Reducer} to + determine the final output. Users can control the sorting and grouping by + specifying two key {@link RawComparator} classes.

    + +

    The Mapper outputs are partitioned per + Reducer. Users can control which keys (and hence records) go to + which Reducer by implementing a custom {@link Partitioner}. + +

    Users can optionally specify a combiner, via + {@link Job#setCombinerClass(Class)}, to perform local aggregation of the + intermediate outputs, which helps to cut down the amount of data transferred + from the Mapper to the Reducer. + +

    Applications can specify if and how the intermediate + outputs are to be compressed and which {@link CompressionCodec}s are to be + used via the Configuration.

    + +

    If the job has zero + reduces then the output of the Mapper is directly written + to the {@link OutputFormat} without sorting by keys.

    + +

    Example:

    +

    + public class TokenCounterMapper 
    +     extends Mapper{
    +    
    +   private final static IntWritable one = new IntWritable(1);
    +   private Text word = new Text();
    +   
    +   public void map(Object key, Text value, Context context) throws IOException {
    +     StringTokenizer itr = new StringTokenizer(value.toString());
    +     while (itr.hasMoreTokens()) {
    +       word.set(itr.nextToken());
    +       context.collect(word, one);
    +     }
    +   }
    + }
    + 

    + +

    Applications may override the {@link #run(Context)} method to exert + greater control on map processing e.g. multi-threaded Mappers + etc.

    + + @see InputFormat + @see JobContext + @see Partitioner + @see Reducer]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + OutputCommitter describes the commit of task output for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the OutputCommitter of + the job to:

    +

      +
    1. + Setup the job during initialization. For example, create the temporary + output directory for the job during the initialization of the job. +
    2. +
    3. + Cleanup the job after the job completion. For example, remove the + temporary output directory after the job completion. +
    4. +
    5. + Setup the task temporary output. +
    6. +
    7. + Check whether a task needs a commit. This is to avoid the commit + procedure if a task does not need commit. +
    8. +
    9. + Commit of the task output. +
    10. +
    11. + Discard the task commit. +
    12. +
    + + @see org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter + @see JobContext + @see TaskAttemptContext]]> +
    +
    + + + + + + + + + + + + + + + + + + + This is to validate the output specification for the job when it is + a job is submitted. Typically checks that it does not already exist, + throwing an exception when it already exists, so that output is not + overwritten.

    + + @param context information about the job + @throws IOException when output should not be attempted]]> +
    +
    + + + + + + + + + + OutputFormat describes the output-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the OutputFormat of the + job to:

    +

      +
    1. + Validate the output-specification of the job. For e.g. check that the + output directory doesn't already exist. +
    2. + Provide the {@link RecordWriter} implementation to be used to write out + the output files of the job. Output files are stored in a + {@link FileSystem}. +
    3. +
    + + @see RecordWriter]]> +
    +
    + + + + + + + + + + + Typically a hash function on a all or a subset of the key.

    + + @param key the key to be partioned. + @param value the entry value. + @param numPartitions the total number of partitions. + @return the partition number for the key.]]> +
    +
    + + Partitioner controls the partitioning of the keys of the + intermediate map-outputs. The key (or a subset of the key) is used to derive + the partition, typically by a hash function. The total number of partitions + is the same as the number of reduce tasks for the job. Hence this controls + which of the m reduce tasks the intermediate key (and hence the + record) is sent for reduction.

    + + @see Reducer]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @param ]]> + + + + + + + + + + + + + + + + + + + + + + RecordWriter to future operations. + + @param context the context of the task + @throws IOException]]> + + + + RecordWriter writes the output <key, value> pairs + to an output file. + +

    RecordWriter implementations write the job outputs to the + {@link FileSystem}. + + @see OutputFormat]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the input keys + @param the class of the input values + @param the class of the output keys + @param the class of the output values]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Reducer implementations + can access the {@link Configuration} for the job via the + {@link JobContext#getConfiguration()} method.

    + +

    Reducer has 3 primary phases:

    +
      +
    1. + +

      Shuffle

      + +

      The Reducer copies the sorted output from each + {@link Mapper} using HTTP across the network.

      +
    2. + +
    3. +

      Sort

      + +

      The framework merge sorts Reducer inputs by + keys + (since different Mappers may have output the same key).

      + +

      The shuffle and sort phases occur simultaneously i.e. while outputs are + being fetched they are merged.

      + +
      SecondarySort
      + +

      To achieve a secondary sort on the values returned by the value + iterator, the application should extend the key with the secondary + key and define a grouping comparator. The keys will be sorted using the + entire key, but will be grouped using the grouping comparator to decide + which keys and values are sent in the same call to reduce.The grouping + comparator is specified via + {@link Job#setGroupingComparatorClass(Class)}. The sort order is + controlled by + {@link Job#setSortComparatorClass(Class)}.

      + + + For example, say that you want to find duplicate web pages and tag them + all with the url of the "best" known example. You would set up the job + like: +
        +
      • Map Input Key: url
      • +
      • Map Input Value: document
      • +
      • Map Output Key: document checksum, url pagerank
      • +
      • Map Output Value: url
      • +
      • Partitioner: by checksum
      • +
      • OutputKeyComparator: by checksum and then decreasing pagerank
      • +
      • OutputValueGroupingComparator: by checksum
      • +
      +
    4. + +
    5. +

      Reduce

      + +

      In this phase the + {@link #reduce(Object, Iterable, Context)} + method is called for each <key, (collection of values)> in + the sorted inputs.

      +

      The output of the reduce task is typically written to a + {@link RecordWriter} via + {@link Context#write(Object, Object)}.

      +
    6. +
    + +

    The output of the Reducer is not re-sorted.

    + +

    Example:

    +

    + public class IntSumReducer extends Reducer {
    +   private IntWritable result = new IntWritable();
    + 
    +   public void reduce(Key key, Iterable values, 
    +                      Context context) throws IOException {
    +     int sum = 0;
    +     for (IntWritable val : values) {
    +       sum += val.get();
    +     }
    +     result.set(sum);
    +     context.collect(key, result);
    +   }
    + }
    + 

    + + @see Mapper + @see Partitioner]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + An example TaskAttemptID is : + attempt_200707121733_0003_m_000005_0 , which represents the + zeroth task attempt for the fifth map task in the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse TaskAttemptID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + An example TaskID is : + task_200707121733_0003_m_000005 , which represents the + fifth map task in the third job running at the jobtracker + started at 200707121733. +

    + Applications should never construct or parse TaskID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskAttemptID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the input key type for the task + @param the input value type for the task + @param the output key type for the task + @param the output value type for the task]]> + + + + + + + + + + + + + + + + + + + FileInputFormat implementations can override this and return + false to ensure that individual input files are never split-up + so that {@link Mapper}s process entire files. + + @param context the job context + @param filename the file name to check + @return is this file splitable?]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileInputFormat is the base class for all file-based + InputFormats. This provides a generic implementation of + {@link #getSplits(JobContext)}. + Subclasses of FileInputFormat can also override the + {@link #isSplitable(JobContext, Path)} method to ensure input-files are + not split-up and are processed as a whole by {@link Mapper}s.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the map's input key type + @param the map's input value type + @param the map's output key type + @param the map's output value type + @param job the job + @return the mapper class to run]]> + + + + + + + the map input key type + @param the map input value type + @param the map output key type + @param the map output value type + @param job the job to modify + @param cls the class to use as the mapper]]> + + + + + + + + + + + + + It can be used instead of the default implementation, + @link org.apache.hadoop.mapred.MapRunner, when the Map operation is not CPU + bound in order to improve throughput. +

    + Mapper implementations using this MapRunnable must be thread-safe. +

    + The Map-Reduce job has to be configured with the mapper to use via + {@link #setMapperClass(Configuration, Class)} and + the number of thread the thread-pool can use with the + {@link #getNumberOfThreads(Configuration) method. The default + value is 10 threads. +

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the job output should be compressed, + false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tasks' Side-Effect Files + +

    Some applications need to create/write-to side-files, which differ from + the actual job-outputs. + +

    In such cases there could be issues with 2 instances of the same TIP + (running simultaneously e.g. speculative tasks) trying to open/write-to the + same file (path) on HDFS. Hence the application-writer will have to pick + unique names per task-attempt (e.g. using the attemptid, say + attempt_200709221812_0001_m_000000_0), not just per TIP.

    + +

    To get around this the Map-Reduce framework helps the application-writer + out by maintaining a special + ${mapred.output.dir}/_temporary/_${taskid} + sub-directory for each task-attempt on HDFS where the output of the + task-attempt goes. On successful completion of the task-attempt the files + in the ${mapred.output.dir}/_temporary/_${taskid} (only) + are promoted to ${mapred.output.dir}. Of course, the + framework discards the sub-directory of unsuccessful task-attempts. This + is completely transparent to the application.

    + +

    The application-writer can take advantage of this by creating any + side-files required in a work directory during execution + of his task i.e. via + {@link #getWorkOutputPath(TaskInputOutputContext)}, and + the framework will move them out similarly - thus she doesn't have to pick + unique paths per task-attempt.

    + +

    The entire discussion holds true for maps of jobs with + reducer=NONE (i.e. 0 reduces) since output of the map, in that case, + goes directly to HDFS.

    + + @return the {@link Path} to the task's temporary output directory + for the map-reduce job.]]> +
    + + + + + + + + + The path can be used to create custom files from within the map and + reduce tasks. The path name will be unique for each task. The path parent + will be the job output directory.

    ls + +

    This method uses the {@link #getUniqueFile} method to make the file name + unique for the task.

    + + @param context the context for the task. + @param name the name for the file. + @param extension the extension for the file + @return a unique path accross all tasks of the job.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This tool supports archiving and anaylzing (sort/grep) of log-files. + It takes as input + a) Input uri which will serve uris of the logs to be archived. + b) Output directory (not mandatory). + b) Directory on dfs to archive the logs. + c) The sort/grep patterns for analyzing the files and separator for boundaries. + Usage: + Logalyzer -archive -archiveDir -analysis -logs -grep -sort -separator +

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/aarch64/share/hadoop/common/jdiff/hadoop_0.20.2.xml b/aarch64/share/hadoop/common/jdiff/hadoop_0.20.2.xml new file mode 100644 index 0000000..36a2a4b --- /dev/null +++ b/aarch64/share/hadoop/common/jdiff/hadoop_0.20.2.xml @@ -0,0 +1,53959 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + final. + + @param name resource to be added, the classpath is examined for a file + with that name.]]> + + + + + + final. + + @param url url of the resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + final. + + @param file file-path of resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + final. + + @param in InputStream to deserialize the object from.]]> + + + + + + + + + + + name property, null if + no such property exists. + + Values are processed for variable expansion + before being returned. + + @param name the property name. + @return the value of the name property, + or null if no such property exists.]]> + + + + + + name property, without doing + variable expansion. + + @param name the property name. + @return the value of the name property, + or null if no such property exists.]]> + + + + + + + value of the name property. + + @param name property name. + @param value property value.]]> + + + + + + + + + + + + + + name property. If no such property + exists, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value, or defaultValue if the property + doesn't exist.]]> + + + + + + + name property as an int. + + If no such property exists, or if the specified value is not a valid + int, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as an int, + or defaultValue.]]> + + + + + + + name property to an int. + + @param name property name. + @param value int value of the property.]]> + + + + + + + name property as a long. + If no such property is specified, or if the specified value is not a valid + long, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a long, + or defaultValue.]]> + + + + + + + name property to a long. + + @param name property name. + @param value long value of the property.]]> + + + + + + + name property as a float. + If no such property is specified, or if the specified value is not a valid + float, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a float, + or defaultValue.]]> + + + + + + + name property to a float. + + @param name property name. + @param value property value.]]> + + + + + + + name property as a boolean. + If no such property is specified, or if the specified value is not a valid + boolean, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a boolean, + or defaultValue.]]> + + + + + + + name property to a boolean. + + @param name property name. + @param value boolean value of the property.]]> + + + + + + + + + + + + + + + + + + + + name property as + a collection of Strings. + If no such property is specified then empty collection is returned. +

    + This is an optimized version of {@link #getStrings(String)} + + @param name property name. + @return property value as a collection of Strings.]]> + + + + + + name property as + an array of Strings. + If no such property is specified then null is returned. + + @param name property name. + @return property value as an array of Strings, + or null.]]> + + + + + + + name property as + an array of Strings. + If no such property is specified then default value is returned. + + @param name property name. + @param defaultValue The default value + @return property value as an array of Strings, + or default value.]]> + + + + + + + name property as + as comma delimited values. + + @param name property name. + @param values The values]]> + + + + + + + + + + + + + + name property + as an array of Class. + The value of the property specifies a list of comma separated class names. + If no such property is specified, then defaultValue is + returned. + + @param name the property name. + @param defaultValue default value. + @return property value as a Class[], + or defaultValue.]]> + + + + + + + name property as a Class. + If no such property is specified, then defaultValue is + returned. + + @param name the class name. + @param defaultValue default value. + @return property value as a Class, + or defaultValue.]]> + + + + + + + + name property as a Class + implementing the interface specified by xface. + + If no such property is specified, then defaultValue is + returned. + + An exception is thrown if the returned class does not implement the named + interface. + + @param name the class name. + @param defaultValue default value. + @param xface the interface implemented by the named class. + @return property value as a Class, + or defaultValue.]]> + + + + + + + + name property to the name of a + theClass implementing the given interface xface. + + An exception is thrown if theClass does not implement the + interface xface. + + @param name property name. + @param theClass property value. + @param xface the interface implemented by the named class.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path.]]> + + + + + + + + + + + + name. + + @param name configuration resource name. + @return an input stream attached to the resource.]]> + + + + + + name. + + @param name configuration resource name. + @return a reader attached to the resource.]]> + + + + + + + + + + + + + + + String + key-value pairs in the configuration. + + @return an iterator over the entries.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + true to set quiet-mode on, false + to turn it off.]]> + + + + + + + + + + + + + + + + + + + Resources + +

    Configurations are specified by resources. A resource contains a set of + name/value pairs as XML data. Each resource is named by either a + String or by a {@link Path}. If named by a String, + then the classpath is examined for a file with that name. If named by a + Path, then the local filesystem is examined directly, without + referring to the classpath. + +

    Unless explicitly turned off, Hadoop by default specifies two + resources, loaded in-order from the classpath:

      +
    1. core-default.xml + : Read-only defaults for hadoop.
    2. +
    3. core-site.xml: Site-specific configuration for a given hadoop + installation.
    4. +
    + Applications may add additional resources, which are loaded + subsequent to these resources in the order they are added. + +

    Final Parameters

    + +

    Configuration parameters may be declared final. + Once a resource declares a value final, no subsequently-loaded + resource can alter that value. + For example, one might define a final parameter with: +

    +  <property>
    +    <name>dfs.client.buffer.dir</name>
    +    <value>/tmp/hadoop/dfs/client</value>
    +    <final>true</final>
    +  </property>
    + + Administrators typically define parameters as final in + core-site.xml for values that user applications may not alter. + +

    Variable Expansion

    + +

    Value strings are first processed for variable expansion. The + available properties are:

      +
    1. Other properties defined in this Configuration; and, if a name is + undefined here,
    2. +
    3. Properties in {@link System#getProperties()}.
    4. +
    + +

    For example, if a configuration resource contains the following property + definitions: +

    +  <property>
    +    <name>basedir</name>
    +    <value>/user/${user.name}</value>
    +  </property>
    +  
    +  <property>
    +    <name>tempdir</name>
    +    <value>${basedir}/tmp</value>
    +  </property>
    + + When conf.get("tempdir") is called, then ${basedir} + will be resolved to another property in this Configuration, while + ${user.name} would then ordinarily be resolved to the value + of the System property with that name.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DistributedCache is a facility provided by the Map-Reduce + framework to cache files (text, archives, jars etc.) needed by applications. +

    + +

    Applications specify the files, via urls (hdfs:// or http://) to be cached + via the {@link org.apache.hadoop.mapred.JobConf}. + The DistributedCache assumes that the + files specified via hdfs:// urls are already present on the + {@link FileSystem} at the path specified by the url.

    + +

    The framework will copy the necessary files on to the slave node before + any tasks for the job are executed on that node. Its efficiency stems from + the fact that the files are only copied once per job and the ability to + cache archives which are un-archived on the slaves.

    + +

    DistributedCache can be used to distribute simple, read-only + data/text files and/or more complex types such as archives, jars etc. + Archives (zip, tar and tgz/tar.gz files) are un-archived at the slave nodes. + Jars may be optionally added to the classpath of the tasks, a rudimentary + software distribution mechanism. Files have execution permissions. + Optionally users can also direct it to symlink the distributed cache file(s) + into the working directory of the task.

    + +

    DistributedCache tracks modification timestamps of the cache + files. Clearly the cache files should not be modified by the application + or externally while the job is executing.

    + +

    Here is an illustrative example on how to use the + DistributedCache:

    +

    +     // Setting up the cache for the application
    +     
    +     1. Copy the requisite files to the FileSystem:
    +     
    +     $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat  
    +     $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip  
    +     $ bin/hadoop fs -copyFromLocal mylib.jar /myapp/mylib.jar
    +     $ bin/hadoop fs -copyFromLocal mytar.tar /myapp/mytar.tar
    +     $ bin/hadoop fs -copyFromLocal mytgz.tgz /myapp/mytgz.tgz
    +     $ bin/hadoop fs -copyFromLocal mytargz.tar.gz /myapp/mytargz.tar.gz
    +     
    +     2. Setup the application's JobConf:
    +     
    +     JobConf job = new JobConf();
    +     DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"), 
    +                                   job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/map.zip", job);
    +     DistributedCache.addFileToClassPath(new Path("/myapp/mylib.jar"), job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytar.tar", job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytgz.tgz", job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytargz.tar.gz", job);
    +     
    +     3. Use the cached files in the {@link org.apache.hadoop.mapred.Mapper}
    +     or {@link org.apache.hadoop.mapred.Reducer}:
    +     
    +     public static class MapClass extends MapReduceBase  
    +     implements Mapper<K, V, K, V> {
    +     
    +       private Path[] localArchives;
    +       private Path[] localFiles;
    +       
    +       public void configure(JobConf job) {
    +         // Get the cached archives/files
    +         localArchives = DistributedCache.getLocalCacheArchives(job);
    +         localFiles = DistributedCache.getLocalCacheFiles(job);
    +       }
    +       
    +       public void map(K key, V value, 
    +                       OutputCollector<K, V> output, Reporter reporter) 
    +       throws IOException {
    +         // Use data from the cached archives/files here
    +         // ...
    +         // ...
    +         output.collect(k, v);
    +       }
    +     }
    +     
    + 

    + + @see org.apache.hadoop.mapred.JobConf + @see org.apache.hadoop.mapred.JobClient]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BufferedFSInputStream + with the specified buffer size, + and saves its argument, the input stream + in, for later use. An internal + buffer array of length size + is created and stored in buf. + + @param in the underlying input stream. + @param size the buffer size. + @exception IllegalArgumentException if size <= 0.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + setReplication of FileSystem + @param src file name + @param replication new replication + @throws IOException + @return true if successful; + false if file does not exist or is a directory]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + fs.scheme.class whose value names the FileSystem class. + The entire URI is passed to the FileSystem instance's initialize method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Return all the files that match filePattern and are not checksum + files. Results are sorted by their names. + +

    + A filename pattern is composed of regular characters and + special pattern matching characters, which are: + +

    +
    +
    +

    +

    ? +
    Matches any single character. + +

    +

    * +
    Matches zero or more characters. + +

    +

    [abc] +
    Matches a single character from character set + {a,b,c}. + +

    +

    [a-b] +
    Matches a single character from the character range + {a...b}. Note that character a must be + lexicographically less than or equal to character b. + +

    +

    [^a] +
    Matches a single character that is not from character set or range + {a}. Note that the ^ character must occur + immediately to the right of the opening bracket. + +

    +

    \c +
    Removes (escapes) any special meaning of character c. + +

    +

    {ab,cd} +
    Matches a string from the string set {ab, cd} + +

    +

    {ab,c{de,fh}} +
    Matches a string from the string set {ab, cde, cfh} + +
    +
    +
    + + @param pathPattern a regular expression specifying a pth pattern + + @return an array of paths that match the path pattern + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + All user code that may potentially use the Hadoop Distributed + File System should be written to use a FileSystem object. The + Hadoop DFS is a multi-machine system that appears as a single + disk. It's useful because of its fault tolerance and potentially + very large capacity. + +

    + The local implementation is {@link LocalFileSystem} and distributed + implementation is DistributedFileSystem.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FilterFileSystem contains + some other file system, which it uses as + its basic file system, possibly transforming + the data along the way or providing additional + functionality. The class FilterFileSystem + itself simply overrides all methods of + FileSystem with versions that + pass all requests to the contained file + system. Subclasses of FilterFileSystem + may further override some of these methods + and may also provide additional methods + and fields.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + buf at offset + and checksum into checksum. + The method is used for implementing read, therefore, it should be optimized + for sequential reading + @param pos chunkPos + @param buf desitination buffer + @param offset offset in buf at which to store data + @param len maximun number of bytes to read + @return number of bytes read]]> + + + + + + + + + + + + + + + + + -1 if the end of the + stream is reached. + @exception IOException if an I/O error occurs.]]> + + + + + + + + + This method implements the general contract of the corresponding + {@link InputStream#read(byte[], int, int) read} method of + the {@link InputStream} class. As an additional + convenience, it attempts to read as many bytes as possible by repeatedly + invoking the read method of the underlying stream. This + iterated read continues until one of the following + conditions becomes true:

      + +
    • The specified number of bytes have been read, + +
    • The read method of the underlying stream returns + -1, indicating end-of-file. + +
    If the first read on the underlying stream returns + -1 to indicate end-of-file then this method returns + -1. Otherwise this method returns the number of bytes + actually read. + + @param b destination buffer. + @param off offset at which to start storing bytes. + @param len maximum number of bytes to read. + @return the number of bytes read, or -1 if the end of + the stream has been reached. + @exception IOException if an I/O error occurs. + ChecksumException if any checksum error occurs]]> +
    + + + + + + + + + + + + + + + + + + n bytes of data from the + input stream. + +

    This method may skip more bytes than are remaining in the backing + file. This produces no exception and the number of bytes skipped + may include some number of bytes that were beyond the EOF of the + backing file. Attempting to read from the stream after skipping past + the end will result in -1 indicating the end of the file. + +

    If n is negative, no bytes are skipped. + + @param n the number of bytes to be skipped. + @return the actual number of bytes skipped. + @exception IOException if an I/O error occurs. + ChecksumException if the chunk to skip to is corrupted]]> + + + + + + + This method may seek past the end of the file. + This produces no exception and an attempt to read from + the stream will result in -1 indicating the end of the file. + + @param pos the postion to seek to. + @exception IOException if an I/O error occurs. + ChecksumException if the chunk to seek to is corrupted]]> + + + + + + + + + + len bytes from + stm + + @param stm an input stream + @param buf destiniation buffer + @param offset offset at which to store data + @param len number of bytes to read + @return actual number of bytes read + @throws IOException if there is any IO error]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + len bytes from the specified byte array + starting at offset off and generate a checksum for + each data chunk. + +

    This method stores bytes from the given array into this + stream's buffer before it gets checksumed. The buffer gets checksumed + and flushed to the underlying output stream when all data + in a checksum chunk are in the buffer. If the buffer is empty and + requested length is at least as large as the size of next checksum chunk + size, this method will checksum and write the chunk directly + to the underlying output stream. Thus it avoids uneccessary data copy. + + @param b the data. + @param off the start offset in the data. + @param len the number of bytes to write. + @exception IOException if an I/O error occurs.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if and only if pathname + should be included]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + trash feature. Files are moved to a user's trash + directory, a subdirectory of their home directory named ".Trash". Files are + initially moved to a current sub-directory of the trash directory. + Within that sub-directory their original path is preserved. Periodically + one may checkpoint the current trash and remove older checkpoints. (This + design permits trash management without enumeration of the full trash + content, without date support in the filesystem, and without clock + synchronization.)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} backed by an FTP client provided by Apache Commons Net. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (cause==null ? null : cause.toString()) (which + typically contains the class and detail message of cause). + @param cause the cause (which is saved for later retrieval by the + {@link #getCause()} method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is a tool for migrating data from an older to a newer version + of an S3 filesystem. +

    +

    + All files in the filesystem are migrated by re-writing the block metadata + - no datafiles are touched. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + Extracts AWS credentials from the filesystem URI or configuration. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A block-based {@link FileSystem} backed by + Amazon S3. +

    + @see NativeS3FileSystem]]> +
    +
    + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If f is a file, this method will make a single call to S3. + If f is a directory, this method will make a maximum of + (n / 1000) + 2 calls to S3, where n is the total number of + files and directories contained directly in f. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} for reading and writing files stored on + Amazon S3. + Unlike {@link org.apache.hadoop.fs.s3.S3FileSystem} this implementation + stores files on S3 in their + native form so they can be read by other S3 tools. +

    + @see org.apache.hadoop.fs.s3.S3FileSystem]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + . + @param name The name of the server + @param port The port to use on the server + @param findPort whether the server should start at the given port and + increment by 1 until it finds a free port. + @param conf Configuration]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + points to the log directory + "/static/" -> points to common static files (src/webapps/static) + "/" -> the jsp server code from (src/webapps/)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + nth value.]]> + + + + + + + + + + + + + + + + + + + + + nth value in the file.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + public class IntArrayWritable extends ArrayWritable { + public IntArrayWritable() { + super(IntWritable.class); + } + } + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a ByteWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new DataInputStream and + ByteArrayInputStream each time data is read. + +

    Typical usage is something like the following:

    +
    + DataInputBuffer buffer = new DataInputBuffer();
    + while (... loop condition ...) {
    +   byte[] data = ... get data ...;
    +   int dataLength = ... get data length ...;
    +   buffer.reset(data, dataLength);
    +   ... read buffer using DataInput methods ...
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new DataOutputStream and + ByteArrayOutputStream each time data is written. + +

    Typical usage is something like the following:

    +
    + DataOutputBuffer buffer = new DataOutputBuffer();
    + while (... loop condition ...) {
    +   buffer.reset();
    +   ... write buffer using DataOutput methods ...
    +   byte[] data = buffer.getData();
    +   int dataLength = buffer.getLength();
    +   ... write data to its ultimate destination ...
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + the class of the item + @param conf the configuration to store + @param item the object to be stored + @param keyName the name of the key to use + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param items the objects to be stored + @param keyName the name of the key to use + @throws IndexOutOfBoundsException if the items array is empty + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + DefaultStringifier offers convenience methods to store/load objects to/from + the configuration. + + @param the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a DoubleWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a FloatWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When two sequence files, which have same Key type but different Value + types, are mapped out to reduce, multiple Value types is not allowed. + In this case, this class can help you wrap instances with different types. +

    + +

    + Compared with ObjectWritable, this class is much more effective, + because ObjectWritable will append the class declaration as a String + into the output file in every Key-Value pair. +

    + +

    + Generic Writable implements {@link Configurable} interface, so that it will be + configured by the framework. The configuration is passed to the wrapped objects + implementing {@link Configurable} interface before deserialization. +

    + + how to use it:
    + 1. Write your own class, such as GenericObject, which extends GenericWritable.
    + 2. Implements the abstract method getTypes(), defines + the classes which will be wrapped in GenericObject in application. + Attention: this classes defined in getTypes() method, must + implement Writable interface. +

    + + The code looks like this: +
    + public class GenericObject extends GenericWritable {
    + 
    +   private static Class[] CLASSES = {
    +               ClassType1.class, 
    +               ClassType2.class,
    +               ClassType3.class,
    +               };
    +
    +   protected Class[] getTypes() {
    +       return CLASSES;
    +   }
    +
    + }
    + 
    + + @since Nov 8, 2006]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new InputStream and + ByteArrayInputStream each time data is read. + +

    Typical usage is something like the following:

    +
    + InputBuffer buffer = new InputBuffer();
    + while (... loop condition ...) {
    +   byte[] data = ... get data ...;
    +   int dataLength = ... get data length ...;
    +   buffer.reset(data, dataLength);
    +   ... read buffer using InputStream methods ...
    + }
    + 
    + @see DataInputBuffer + @see DataOutput]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a IntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + closes the input and output streams + at the end. + @param in InputStrem to read from + @param out OutputStream to write to + @param conf the Configuration object]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ignore any {@link IOException} or + null pointers. Must only be used for cleanup in exception handlers. + @param log the log to record problems to at debug level. Can be null. + @param closeables the objects to close]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a LongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A map is a directory containing two files, the data file, + containing all keys and values in the map, and a smaller index + file, containing a fraction of the keys. The fraction is determined by + {@link Writer#getIndexInterval()}. + +

    The index file is read entirely into memory. Thus key implementations + should try to keep themselves small. + +

    Map files are created by adding entries in-order. To maintain a large + database, perform updates by copying the previous version of a database and + merging in a sorted change list, to create a new version of the database in + a new file. Sorting large change lists can be done with {@link + SequenceFile.Sorter}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key and + val. Returns true if such a pair exists and false when at + the end of the map]]> + + + + + + + + + + + + + + + + key or if it does not exist, at the first entry + after the named key. + +- * @param key - key that we're trying to find +- * @param val - data value if key is found +- * @return - the key that was the closest match or null if eof.]]> + + + + + + + + + key does not exist, return + the first entry that falls just before the key. Otherwise, + return the record that sorts just after. + @return - the key that was the closest match or null if eof.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is an MD5Hash whose digest contains the + same values.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This saves memory over creating a new OutputStream and + ByteArrayOutputStream each time data is written. + +

    Typical usage is something like the following:

    +
    + OutputBuffer buffer = new OutputBuffer();
    + while (... loop condition ...) {
    +   buffer.reset();
    +   ... write buffer using OutputStream methods ...
    +   byte[] data = buffer.getData();
    +   int dataLength = buffer.getLength();
    +   ... write data to its ultimate destination ...
    + }
    + 
    + @see DataOutputBuffer + @see InputBuffer]]> +
    +
    + + + + + + + + + + + + + + + A {@link Comparator} that operates directly on byte representations of + objects. +

    + @param + @see DeserializerComparator]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SequenceFiles are flat files consisting of binary key/value + pairs. + +

    SequenceFile provides {@link Writer}, {@link Reader} and + {@link Sorter} classes for writing, reading and sorting respectively.

    + + There are three SequenceFile Writers based on the + {@link CompressionType} used to compress key/value pairs: +
      +
    1. + Writer : Uncompressed records. +
    2. +
    3. + RecordCompressWriter : Record-compressed files, only compress + values. +
    4. +
    5. + BlockCompressWriter : Block-compressed files, both keys & + values are collected in 'blocks' + separately and compressed. The size of + the 'block' is configurable. +
    + +

    The actual compression algorithm used to compress key and/or values can be + specified by using the appropriate {@link CompressionCodec}.

    + +

    The recommended way is to use the static createWriter methods + provided by the SequenceFile to chose the preferred format.

    + +

    The {@link Reader} acts as the bridge and can read any of the above + SequenceFile formats.

    + +

    SequenceFile Formats

    + +

    Essentially there are 3 different formats for SequenceFiles + depending on the CompressionType specified. All of them share a + common header described below. + +

    +
      +
    • + version - 3 bytes of magic header SEQ, followed by 1 byte of actual + version number (e.g. SEQ4 or SEQ6) +
    • +
    • + keyClassName -key class +
    • +
    • + valueClassName - value class +
    • +
    • + compression - A boolean which specifies if compression is turned on for + keys/values in this file. +
    • +
    • + blockCompression - A boolean which specifies if block-compression is + turned on for keys/values in this file. +
    • +
    • + compression codec - CompressionCodec class which is used for + compression of keys and/or values (if compression is + enabled). +
    • +
    • + metadata - {@link Metadata} for this file. +
    • +
    • + sync - A sync marker to denote end of the header. +
    • +
    + +
    Uncompressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Value
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +
    Record-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Compressed Value
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +
    Block-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record Block +
        +
      • Compressed key-lengths block-size
      • +
      • Compressed key-lengths block
      • +
      • Compressed keys block-size
      • +
      • Compressed keys block
      • +
      • Compressed value-lengths block-size
      • +
      • Compressed value-lengths block
      • +
      • Compressed values block-size
      • +
      • Compressed values block
      • +
      +
    • +
    • + A sync-marker every few 100 bytes or so. +
    • +
    + +

    The compressed blocks of key lengths and value lengths consist of the + actual lengths of individual keys/values encoded in ZeroCompressedInteger + format.

    + + @see CompressionCodec]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key, skipping its + value. True if another entry exists, and false at end of file.]]> + + + + + + + + key and + val. Returns true if such a pair exists and false when at + end of file]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The position passed must be a position returned by {@link + SequenceFile.Writer#getLength()} when writing this file. To seek to an arbitrary + position, use {@link SequenceFile.Reader#sync(long)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SegmentDescriptor + @param segments the list of SegmentDescriptors + @param tmpDir the directory to write temporary files into + @return RawKeyValueIterator + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For best performance, applications should make sure that the {@link + Writable#readFields(DataInput)} implementation of their keys is + very efficient. In particular, it should avoid allocating memory.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This always returns a synchronized position. In other words, + immediately after calling {@link SequenceFile.Reader#seek(long)} with a position + returned by this method, {@link SequenceFile.Reader#next(Writable)} may be called. However + the key may be earlier in the file than key last written when this + method was called (e.g., with block-compression, it may be the first key + in the block that was being written when this method was called).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key. Returns + true if such a key exists and false when at the end of the set.]]> + + + + + + + key. + Returns key, or null if no match exists.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + position. Note that this + method avoids using the converter or doing String instatiation + @return the Unicode scalar value at position or -1 + if the position is invalid or points to a + trailing byte]]> + + + + + + + + + + what in the backing + buffer, starting as position start. The starting + position is measured in bytes and the return value is in + terms of byte position in the buffer. The backing buffer is + not converted to a string for this operation. + @return byte position of the first occurence of the search + string in the UTF-8 buffer or -1 if not found]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a Text with the same contents.]]> + + + + + + + + + + + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException.]]> + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException. + @return ByteBuffer: bytes stores at ByteBuffer.array() + and length is ByteBuffer.limit()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + In + addition, it provides methods for string traversal without converting the + byte array to a string.

    Also includes utilities for + serializing/deserialing a string, coding/decoding a string, checking if a + byte array contains valid UTF8 code, calculating the length of an encoded + string.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a UTF8 with the same contents.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + Also includes utilities for efficiently reading and writing UTF-8. + + @deprecated replaced by Text]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is useful when a class may evolve, so that instances written by the + old version of the class may still be processed by the new version. To + handle this situation, {@link #readFields(DataInput)} + implementations should catch {@link VersionMismatchException}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VIntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VLongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + out. + + @param out DataOuput to serialize this object into. + @throws IOException]]> + + + + + + + in. + +

    For efficiency, implementations should attempt to re-use storage in the + existing object where possible.

    + + @param in DataInput to deseriablize this object from. + @throws IOException]]> +
    + + + Any key or value type in the Hadoop Map-Reduce + framework implements this interface.

    + +

    Implementations typically implement a static read(DataInput) + method which constructs a new instance, calls {@link #readFields(DataInput)} + and returns the instance.

    + +

    Example:

    +

    +     public class MyWritable implements Writable {
    +       // Some data     
    +       private int counter;
    +       private long timestamp;
    +       
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +       
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +       
    +       public static MyWritable read(DataInput in) throws IOException {
    +         MyWritable w = new MyWritable();
    +         w.readFields(in);
    +         return w;
    +       }
    +     }
    + 

    ]]> +
    + + + + + + + + WritableComparables can be compared to each other, typically + via Comparators. Any type which is to be used as a + key in the Hadoop Map-Reduce framework should implement this + interface.

    + +

    Example:

    +

    +     public class MyWritableComparable implements WritableComparable {
    +       // Some data
    +       private int counter;
    +       private long timestamp;
    +       
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +       
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +       
    +       public int compareTo(MyWritableComparable w) {
    +         int thisValue = this.value;
    +         int thatValue = ((IntWritable)o).value;
    +         return (thisValue < thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
    +       }
    +     }
    + 

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The default implementation reads the data into two {@link + WritableComparable}s (using {@link + Writable#readFields(DataInput)}, then calls {@link + #compare(WritableComparable,WritableComparable)}.]]> + + + + + + + The default implementation uses the natural ordering, calling {@link + Comparable#compareTo(Object)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This base implemenation uses the natural ordering. To define alternate + orderings, override {@link #compare(WritableComparable,WritableComparable)}. + +

    One may optimize compare-intensive operations by overriding + {@link #compare(byte[],int,int,byte[],int,int)}. Static utility methods are + provided to assist in optimized implementations of this method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Enum type + @param in DataInput to read from + @param enumType Class type of Enum + @return Enum represented by String read from DataInput + @throws IOException]]> + + + + + + + + + + + + + + + + len number of bytes in input streamin + @param in input stream + @param len number of bytes to skip + @throws IOException when skipped less number of bytes]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CompressionCodec for which to get the + Compressor + @return Compressor for the given + CompressionCodec from the pool or a new one]]> + + + + + + CompressionCodec for which to get the + Decompressor + @return Decompressor for the given + CompressionCodec the pool or a new one]]> + + + + + + Compressor to be returned to the pool]]> + + + + + + Decompressor to be returned to the + pool]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Implementations are assumed to be buffered. This permits clients to + reposition the underlying input stream then call {@link #resetState()}, + without having to also synchronize client buffers.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + true if a preset dictionary is needed for decompression. + @return true if a preset dictionary is needed for decompression]]> + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FIXME: This array should be in a private or package private location, + since it could be modified by malicious code. +

    ]]> +
    + + + + This interface is public for historical purposes. You should have no need to + use it. +

    ]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Although BZip2 headers are marked with the magic "Bz" this + constructor expects the next byte in the stream to be the first one after + the magic. Thus callers have to skip the first two bytes. Otherwise this + constructor will throw an exception. +

    + + @throws IOException + if the stream content is malformed or an I/O error occurs. + @throws NullPointerException + if in == null]]> +
    +
    + + + + + + + + + + + + + + + The decompression requires large amounts of memory. Thus you should call the + {@link #close() close()} method as soon as possible, to force + CBZip2InputStream to release the allocated memory. See + {@link CBZip2OutputStream CBZip2OutputStream} for information about memory + usage. +

    + +

    + CBZip2InputStream reads bytes from the compressed source stream via + the single byte {@link java.io.InputStream#read() read()} method exclusively. + Thus you should consider to use a buffered source stream. +

    + +

    + Instances of this class are not threadsafe. +

    ]]> +
    +
    + + + + + + + + CBZip2OutputStream with a blocksize of 900k. + +

    + Attention: The caller is resonsible to write the two BZip2 magic + bytes "BZ" to the specified stream prior to calling this + constructor. +

    + + @param out * + the destination stream. + + @throws IOException + if an I/O error occurs in the specified stream. + @throws NullPointerException + if out == null.]]> +
    +
    + + + + CBZip2OutputStream with specified blocksize. + +

    + Attention: The caller is resonsible to write the two BZip2 magic + bytes "BZ" to the specified stream prior to calling this + constructor. +

    + + + @param out + the destination stream. + @param blockSize + the blockSize as 100k units. + + @throws IOException + if an I/O error occurs in the specified stream. + @throws IllegalArgumentException + if (blockSize < 1) || (blockSize > 9). + @throws NullPointerException + if out == null. + + @see #MIN_BLOCKSIZE + @see #MAX_BLOCKSIZE]]> +
    +
    + + + + + + + + + + + + + inputLength this method returns MAX_BLOCKSIZE + always. + + @param inputLength + The length of the data which will be compressed by + CBZip2OutputStream.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + == 1.]]> + + + + + == 9.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If you are ever unlucky/improbable enough to get a stack overflow whilst + sorting, increase the following constant and try again. In practice I + have never seen the stack go above 27 elems, so the following limit seems + very generous. +

    ]]> +
    +
    + + + The compression requires large amounts of memory. Thus you should call the + {@link #close() close()} method as soon as possible, to force + CBZip2OutputStream to release the allocated memory. +

    + +

    + You can shrink the amount of allocated memory and maybe raise the compression + speed by choosing a lower blocksize, which in turn may cause a lower + compression ratio. You can avoid unnecessary memory allocation by avoiding + using a blocksize which is bigger than the size of the input. +

    + +

    + You can compute the memory usage for compressing by the following formula: +

    + +
    + <code>400k + (9 * blocksize)</code>.
    + 
    + +

    + To get the memory required for decompression by {@link CBZip2InputStream + CBZip2InputStream} use +

    + +
    + <code>65k + (5 * blocksize)</code>.
    + 
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Memory usage by blocksize
    Blocksize Compression
    + memory usage
    Decompression
    + memory usage
    100k1300k565k
    200k2200k1065k
    300k3100k1565k
    400k4000k2065k
    500k4900k2565k
    600k5800k3065k
    700k6700k3565k
    800k7600k4065k
    900k8500k4565k
    + +

    + For decompression CBZip2InputStream allocates less memory if the + bzipped input is smaller than one block. +

    + +

    + Instances of this class are not threadsafe. +

    + +

    + TODO: Update to BZip2 1.0.1 +

    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return the total (non-negative) number of uncompressed bytes input so far]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return the total (non-negative) number of uncompressed bytes input so far]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if native-zlib is loaded & initialized + and can be loaded for this job, else false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • "none" - No compression. +
  • "lzo" - LZO compression. +
  • "gz" - GZIP compression. + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • Block Compression. +
  • Named meta data blocks. +
  • Sorted or unsorted keys. +
  • Seek by key or by file offset. + + The memory footprint of a TFile includes the following: +
      +
    • Some constant overhead of reading or writing a compressed block. +
        +
      • Each compressed block requires one compression/decompression codec for + I/O. +
      • Temporary space to buffer the key. +
      • Temporary space to buffer the value (for TFile.Writer only). Values are + chunk encoded, so that we buffer at most one chunk of user data. By default, + the chunk buffer is 1MB. Reading chunked value does not require additional + memory. +
      +
    • TFile index, which is proportional to the total number of Data Blocks. + The total amount of memory needed to hold the index can be estimated as + (56+AvgKeySize)*NumBlocks. +
    • MetaBlock index, which is proportional to the total number of Meta + Blocks.The total amount of memory needed to hold the index for Meta Blocks + can be estimated as (40+AvgMetaBlockName)*NumMetaBlock. +
    +

    + The behavior of TFile can be customized by the following variables through + Configuration: +

      +
    • tfile.io.chunk.size: Value chunk size. Integer (in bytes). Default + to 1MB. Values of the length less than the chunk size is guaranteed to have + known value length in read time (See + {@link TFile.Reader.Scanner.Entry#isValueLengthKnown()}). +
    • tfile.fs.output.buffer.size: Buffer size used for + FSDataOutputStream. Integer (in bytes). Default to 256KB. +
    • tfile.fs.input.buffer.size: Buffer size used for + FSDataInputStream. Integer (in bytes). Default to 256KB. +
    +

    + Suggestions on performance optimization. +

      +
    • Minimum block size. We recommend a setting of minimum block size between + 256KB to 1MB for general usage. Larger block size is preferred if files are + primarily for sequential access. However, it would lead to inefficient random + access (because there are more data to decompress). Smaller blocks are good + for random access, but require more memory to hold the block index, and may + be slower to create (because we must flush the compressor stream at the + conclusion of each data block, which leads to an FS I/O flush). Further, due + to the internal caching in Compression codec, the smallest possible block + size would be around 20KB-30KB. +
    • The current implementation does not offer true multi-threading for + reading. The implementation uses FSDataInputStream seek()+read(), which is + shown to be much faster than positioned-read call in single thread mode. + However, it also means that if multiple threads attempt to access the same + TFile (using multiple scanners) simultaneously, the actual I/O is carried out + sequentially even if they access different DFS blocks. +
    • Compression codec. Use "none" if the data is not very compressable (by + compressable, I mean a compression ratio at least 2:1). Generally, use "lzo" + as the starting point for experimenting. "gz" overs slightly better + compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to + decompress, comparing to "lzo". +
    • File system buffering, if the underlying FSDataInputStream and + FSDataOutputStream is already adequately buffered; or if applications + reads/writes keys and values in large buffers, we can reduce the sizes of + input/output buffering in TFile layer by setting the configuration parameters + "tfile.fs.input.buffer.size" and "tfile.fs.output.buffer.size". +
    + + Some design rationale behind TFile can be found at Hadoop-3315.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + entry of the TFile. + @param endKey + End key of the scan. If null, scan up to the last entry + of the TFile. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Use {@link Scanner#atEnd()} to test whether the cursor is at the end + location of the scanner. +

    + Use {@link Scanner#advance()} to move the cursor to the next key-value + pair (or end if none exists). Use seekTo methods ( + {@link Scanner#seekTo(byte[])} or + {@link Scanner#seekTo(byte[], int, int)}) to seek to any arbitrary + location in the covered range (including backward seeking). Use + {@link Scanner#rewind()} to seek back to the beginning of the scanner. + Use {@link Scanner#seekToEnd()} to seek to the end of the scanner. +

    + Actual keys and values may be obtained through {@link Scanner.Entry} + object, which is obtained through {@link Scanner#entry()}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

  • Algorithmic comparator: binary comparators that is language + independent. Currently, only "memcmp" is supported. +
  • Language-specific comparator: binary comparators that can + only be constructed in specific language. For Java, the syntax + is "jclass:", followed by the class name of the RawComparator. + Currently, we only support RawComparators that can be + constructed through the default constructor (with no + parameters). Parameterized RawComparators such as + {@link WritableComparator} or + {@link JavaSerializationComparator} may not be directly used. + One should write a wrapper class that inherits from such classes + and use its default constructor to perform proper + initialization. + + @param conf + The configuration object. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + If an exception is thrown, the TFile will be in an inconsistent + state. The only legitimate call after that would be close]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Utils#writeVLong(out, n). + + @param out + output stream + @param n + The integer to be encoded + @throws IOException + @see Utils#writeVLong(DataOutput, long)]]> + + + + + + + + +
  • if n in [-32, 127): encode in one byte with the actual value. + Otherwise, +
  • if n in [-20*2^8, 20*2^8): encode in two bytes: byte[0] = n/256 - 52; + byte[1]=n&0xff. Otherwise, +
  • if n IN [-16*2^16, 16*2^16): encode in three bytes: byte[0]=n/2^16 - + 88; byte[1]=(n>>8)&0xff; byte[2]=n&0xff. Otherwise, +
  • if n in [-8*2^24, 8*2^24): encode in four bytes: byte[0]=n/2^24 - 112; + byte[1] = (n>>16)&0xff; byte[2] = (n>>8)&0xff; byte[3]=n&0xff. Otherwise: +
  • if n in [-2^31, 2^31): encode in five bytes: byte[0]=-125; byte[1] = + (n>>24)&0xff; byte[2]=(n>>16)&0xff; byte[3]=(n>>8)&0xff; byte[4]=n&0xff; +
  • if n in [-2^39, 2^39): encode in six bytes: byte[0]=-124; byte[1] = + (n>>32)&0xff; byte[2]=(n>>24)&0xff; byte[3]=(n>>16)&0xff; + byte[4]=(n>>8)&0xff; byte[5]=n&0xff +
  • if n in [-2^47, 2^47): encode in seven bytes: byte[0]=-123; byte[1] = + (n>>40)&0xff; byte[2]=(n>>32)&0xff; byte[3]=(n>>24)&0xff; + byte[4]=(n>>16)&0xff; byte[5]=(n>>8)&0xff; byte[6]=n&0xff; +
  • if n in [-2^55, 2^55): encode in eight bytes: byte[0]=-122; byte[1] = + (n>>48)&0xff; byte[2] = (n>>40)&0xff; byte[3]=(n>>32)&0xff; + byte[4]=(n>>24)&0xff; byte[5]=(n>>16)&0xff; byte[6]=(n>>8)&0xff; + byte[7]=n&0xff; +
  • if n in [-2^63, 2^63): encode in nine bytes: byte[0]=-121; byte[1] = + (n>>54)&0xff; byte[2] = (n>>48)&0xff; byte[3] = (n>>40)&0xff; + byte[4]=(n>>32)&0xff; byte[5]=(n>>24)&0xff; byte[6]=(n>>16)&0xff; + byte[7]=(n>>8)&0xff; byte[8]=n&0xff; + + + @param out + output stream + @param n + the integer number + @throws IOException]]> + + + + + + + (int)Utils#readVLong(in). + + @param in + input stream + @return the decoded integer + @throws IOException + + @see Utils#readVLong(DataInput)]]> + + + + + + + +
  • if (FB >= -32), return (long)FB; +
  • if (FB in [-72, -33]), return (FB+52)<<8 + NB[0]&0xff; +
  • if (FB in [-104, -73]), return (FB+88)<<16 + (NB[0]&0xff)<<8 + + NB[1]&0xff; +
  • if (FB in [-120, -105]), return (FB+112)<<24 + (NB[0]&0xff)<<16 + + (NB[1]&0xff)<<8 + NB[2]&0xff; +
  • if (FB in [-128, -121]), return interpret NB[FB+129] as a signed + big-endian integer. + + @param in + input stream + @return the decoded long integer. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @param cmp + Comparator for the key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @param cmp + Comparator for the key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Keep trying a limited number of times, waiting a fixed time between attempts, + and then fail by re-throwing the exception. +

    ]]> +
    +
    + + + + + + + Keep trying for a maximum time, waiting a fixed time between attempts, + and then fail by re-throwing the exception. +

    ]]> +
    +
    + + + + + + + Keep trying a limited number of times, waiting a growing amount of time between attempts, + and then fail by re-throwing the exception. + The time between attempts is sleepTime mutliplied by the number of tries so far. +

    ]]> +
    +
    + + + + + + + Keep trying a limited number of times, waiting a growing amount of time between attempts, + and then fail by re-throwing the exception. + The time between attempts is sleepTime mutliplied by a random + number in the range of [0, 2 to the number of retries) +

    ]]> +
    +
    + + + + + + Set a default policy with some explicit handlers for specific exceptions. +

    ]]> +
    +
    + + + + + + A retry policy for RemoteException + Set a default policy with some explicit handlers for specific exceptions. +

    ]]> +
    +
    + + + + Try once, and fail by re-throwing the exception. + This corresponds to having no retry mechanism in place. +

    ]]> +
    +
    + + + + Try once, and fail silently for void methods, or by + re-throwing the exception for non-void methods. +

    ]]> +
    +
    + + + + Keep trying forever. +

    ]]> +
    +
    + + + A collection of useful implementations of {@link RetryPolicy}. +

    ]]> +
    +
    + + + + + + + + + + Determines whether the framework should retry a + method for the given exception, and the number + of retries that have been made for that operation + so far. +

    + @param e The exception that caused the method to fail. + @param retries The number of times the method has been retried. + @return true if the method should be retried, + false if the method should not be retried + but shouldn't fail with an exception (only for void methods). + @throws Exception The re-thrown exception e indicating + that the method failed and should not be retried further.]]> +
    +
    + + + Specifies a policy for retrying method failures. + Implementations of this interface should be immutable. +

    ]]> +
    +
    + + + + + + + + + + + + Create a proxy for an interface of an implementation class + using the same retry policy for each method in the interface. +

    + @param iface the interface that the retry will implement + @param implementation the instance whose methods should be retried + @param retryPolicy the policy for retirying method call failures + @return the retry proxy]]> +
    +
    + + + + + + + Create a proxy for an interface of an implementation class + using the a set of retry policies specified by method name. + If no retry policy is defined for a method then a default of + {@link RetryPolicies#TRY_ONCE_THEN_FAIL} is used. +

    + @param iface the interface that the retry will implement + @param implementation the instance whose methods should be retried + @param methodNameToPolicyMap a map of method names to retry policies + @return the retry proxy]]> +
    +
    + + + A factory for creating retry proxies. +

    ]]> +
    +
    + +
    + + + + + + + + Prepare the deserializer for reading.

    ]]> +
    +
    + + + + + + Deserialize the next object from the underlying input stream. + If the object t is non-null then this deserializer + may set its internal state to the next object read from the input + stream. Otherwise, if the object t is null a new + deserialized object will be created. +

    + @return the deserialized object]]> +
    +
    + + + + Close the underlying input stream and clear up any resources.

    ]]> +
    +
    + + + Provides a facility for deserializing objects of type from an + {@link InputStream}. +

    + +

    + Deserializers are stateful, but must not buffer the input since + other producers may read from the input between calls to + {@link #deserialize(Object)}. +

    + @param ]]> +
    +
    + + + + + + + + + + + + + + + + + + A {@link RawComparator} that uses a {@link Deserializer} to deserialize + the objects to be compared so that the standard {@link Comparator} can + be used to compare them. +

    +

    + One may optimize compare-intensive operations by using a custom + implementation of {@link RawComparator} that operates directly + on byte representations. +

    + @param ]]> +
    +
    + + + + + + + + + + + + + + + + + + An experimental {@link Serialization} for Java {@link Serializable} classes. +

    + @see JavaSerializationComparator]]> +
    +
    + + + + + + + + + + + + + A {@link RawComparator} that uses a {@link JavaSerialization} + {@link Deserializer} to deserialize objects that are then compared via + their {@link Comparable} interfaces. +

    + @param + @see JavaSerialization]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + Encapsulates a {@link Serializer}/{@link Deserializer} pair. +

    + @param ]]> +
    +
    + + + + + + + Serializations are found by reading the io.serializations + property from conf, which is a comma-delimited list of + classnames. +

    ]]> +
    +
    + + + + + + + + + + + + A factory for {@link Serialization}s. +

    ]]> +
    +
    + + + + + + + + Prepare the serializer for writing.

    ]]> +
    +
    + + + + + Serialize t to the underlying output stream.

    ]]> +
    +
    + + + + Close the underlying output stream and clear up any resources.

    ]]> +
    +
    + + + Provides a facility for serializing objects of type to an + {@link OutputStream}. +

    + +

    + Serializers are stateful, but must not buffer the output since + other producers may write to the output between calls to + {@link #serialize(Object)}. +

    + @param ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + param, to the IPC server running at + address, returning the value. Throws exceptions if there are + network problems or if the remote code threw an exception. + @deprecated Use {@link #call(Writable, InetSocketAddress, Class, UserGroupInformation)} instead]]> + + + + + + + + + + param, to the IPC server running at + address with the ticket credentials, returning + the value. + Throws exceptions if there are network problems or if the remote code + threw an exception. + @deprecated Use {@link #call(Writable, InetSocketAddress, Class, UserGroupInformation)} instead]]> + + + + + + + + + + + param, to the IPC server running at + address which is servicing the protocol protocol, + with the ticket credentials, returning the value. + Throws exceptions if there are network problems or if the remote code + threw an exception.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Unwraps any IOException. + + @param lookupTypes the desired exception class. + @return IOException, which is either the lookupClass exception or this.]]> + + + + + This unwraps any Throwable that has a constructor taking + a String as a parameter. + Otherwise it returns this. + + @return Throwable]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + protocol is a Java interface. All parameters and return types must + be one of: + +
    • a primitive type, boolean, byte, + char, short, int, long, + float, double, or void; or
    • + +
    • a {@link String}; or
    • + +
    • a {@link Writable}; or
    • + +
    • an array of the above types
    + + All methods in the protocol should throw only IOException. No field data of + the protocol instance is transmitted.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + handlerCount determines + the number of handler threads that will be used to process calls.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + ,name=RpcActivityForPort" + + Many of the activity metrics are sampled and averaged on an interval + which can be specified in the metrics config file. +

    + For the metrics that are sampled and averaged, one must specify + a metrics context that does periodic update calls. Most metrics contexts do. + The default Null metrics context however does NOT. So if you aren't + using any other metrics context then you can turn on the viewing and averaging + of sampled metrics by specifying the following two lines + in the hadoop-meterics.properties file: +

    +        rpc.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
    +        rpc.period=10
    +  
    +

    + Note that the metrics are collected regardless of the context used. + The context with the update thread is used to average the data periodically + + + + Impl details: We use a dynamic mbean that gets the list of the metrics + from the metrics registry passed as an argument to the constructor]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class has a number of metrics variables that are publicly accessible; + these variables (objects) have methods to update their values; + for example: +

    {@link #rpcQueueTime}.inc(time)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For the statistics that are sampled and averaged, one must specify + a metrics context that does periodic update calls. Most do. + The default Null metrics context however does NOT. So if you aren't + using any other metrics context then you can turn on the viewing and averaging + of sampled metrics by specifying the following two lines + in the hadoop-meterics.properties file: +

    +        rpc.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
    +        rpc.period=10
    +  
    +

    + Note that the metrics are collected regardless of the context used. + The context with the update thread is used to average the data periodically]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When constructing the instance, if the factory property + contextName.class exists, + its value is taken to be the name of the class to instantiate. Otherwise, + the default is to create an instance of + org.apache.hadoop.metrics.spi.NullContext, which is a + dummy "no-op" context which will cause all metric data to be discarded. + + @param contextName the name of the context + @return the named MetricsContext]]> + + + + + + + + + + + + + + + + + + + + + When the instance is constructed, this method checks if the file + hadoop-metrics.properties exists on the class path. If it + exists, it must be in the format defined by java.util.Properties, and all + the properties in the file are set as attributes on the newly created + ContextFactory instance. + + @return the singleton ContextFactory instance]]> + + + + getFactory() method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + startMonitoring() again after calling + this. + @see #close()]]> + + + + + + + + + + + + + + + + recordName. + Throws an exception if the metrics implementation is configured with a fixed + set of record names and recordName is not in that set. + + @param recordName the name of the record + @throws MetricsException if recordName conflicts with configuration data]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A record name identifies the kind of data to be reported. For example, a + program reporting statistics relating to the disks on a computer might use + a record name "diskStats".

    + + A record has zero or more tags. A tag has a name and a value. To + continue the example, the "diskStats" record might use a tag named + "diskName" to identify a particular disk. Sometimes it is useful to have + more than one tag, so there might also be a "diskType" with value "ide" or + "scsi" or whatever.

    + + A record also has zero or more metrics. These are the named + values that are to be reported to the metrics system. In the "diskStats" + example, possible metric names would be "diskPercentFull", "diskPercentBusy", + "kbReadPerSecond", etc.

    + + The general procedure for using a MetricsRecord is to fill in its tag and + metric values, and then call update() to pass the record to the + client library. + Metric data is not immediately sent to the metrics system + each time that update() is called. + An internal table is maintained, identified by the record name. This + table has columns + corresponding to the tag and the metric names, and rows + corresponding to each unique set of tag values. An update + either modifies an existing row in the table, or adds a new row with a set of + tag values that are different from all the other rows. Note that if there + are no tags, then there can be at most one row in the table.

    + + Once a row is added to the table, its data will be sent to the metrics system + on every timer period, whether or not it has been updated since the previous + timer period. If this is inappropriate, for example if metrics were being + reported by some transient object in an application, the remove() + method can be used to remove the row and thus stop the data from being + sent.

    + + Note that the update() method is atomic. This means that it is + safe for different threads to be updating the same metric. More precisely, + it is OK for different threads to call update() on MetricsRecord instances + with the same set of tag names and tag values. Different threads should + not use the same MetricsRecord instance at the same time.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MetricsContext.registerUpdater().]]> + + + + + + + + + + + + + + + + + + + + + + + + + fileName attribute, + if specified. Otherwise the data will be written to standard + output.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is configured by setting ContextFactory attributes which in turn + are usually configured through a properties file. All the attributes are + prefixed by the contextName. For example, the properties file might contain: +

    + myContextName.fileName=/tmp/metrics.log
    + myContextName.period=5
    + 
    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + contextName.tableName. The returned map consists of + those attributes with the contextName and tableName stripped off.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + recordName. + Throws an exception if the metrics implementation is configured with a fixed + set of record names and recordName is not in that set. + + @param recordName the name of the record + @throws MetricsException if recordName conflicts with configuration data]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class implements the internal table of metric data, and the timer + on which data is to be sent to the metrics system. Subclasses must + override the abstract emitRecord method in order to transmit + the data.

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + update + and remove().]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hostname or hostname:port. If + the specs string is null, defaults to localhost:defaultPort. + + @return a list of InetSocketAddress objects.]]> + + + + + + + + + + + + + + + + + + + ,name=" + Where the and are the supplied parameters + + @param serviceName + @param nameName + @param theMbean - the MBean to register + @return the named used to register the MBean]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hadoop.rpc.socket.factory.class.<ClassName>. When no + such parameter exists then fall back on the default socket factory as + configured by hadoop.rpc.socket.factory.class.default. If + this default socket factory is not configured, then fall back on the JVM + default socket factory. + + @param conf the configuration + @param clazz the class (usually a {@link VersionedProtocol}) + @return a socket factory]]> + + + + + + hadoop.rpc.socket.factory.default + + @param conf the configuration + @return the default socket factory as specified in the configuration or + the JVM default socket factory if the configuration does not + contain a default socket factory property.]]> + + + + + + + + + + + + + : + ://:/]]> + + + + + + + + : + ://:/]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + From documentation for {@link #getInputStream(Socket, long)}:
    + Returns InputStream for the socket. If the socket has an associated + SocketChannel then it returns a + {@link SocketInputStream} with the given timeout. If the socket does not + have a channel, {@link Socket#getInputStream()} is returned. In the later + case, the timeout argument is ignored and the timeout set with + {@link Socket#setSoTimeout(int)} applies for reads.

    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getInputStream()}. + + @see #getInputStream(Socket, long) + + @param socket + @return InputStream for reading from the socket. + @throws IOException]]> +
    +
    + + + + + +
    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getInputStream()}. + + @see Socket#getChannel() + + @param socket + @param timeout timeout in milliseconds. This may not always apply. zero + for waiting as long as necessary. + @return InputStream for reading from the socket. + @throws IOException]]> +
    +
    + + + + +
    + + From documentation for {@link #getOutputStream(Socket, long)} :
    + Returns OutputStream for the socket. If the socket has an associated + SocketChannel then it returns a + {@link SocketOutputStream} with the given timeout. If the socket does not + have a channel, {@link Socket#getOutputStream()} is returned. In the later + case, the timeout argument is ignored and the write will wait until + data is available.

    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getOutputStream()}. + + @see #getOutputStream(Socket, long) + + @param socket + @return OutputStream for writing to the socket. + @throws IOException]]> +
    +
    + + + + + +
    + + Any socket created using socket factories returned by {@link #NetUtils}, + must use this interface instead of {@link Socket#getOutputStream()}. + + @see Socket#getChannel() + + @param socket + @param timeout timeout in milliseconds. This may not always apply. zero + for waiting as long as necessary. + @return OutputStream for writing to the socket. + @throws IOException]]> +
    +
    + + + + + + + socket.connect(endpoint, timeout). If + socket.getChannel() returns a non-null channel, + connect is implemented using Hadoop's selectors. This is done mainly + to avoid Sun's connect implementation from creating thread-local + selectors, since Hadoop does not have control on when these are closed + and could end up taking all the available file descriptors. + + @see java.net.Socket#connect(java.net.SocketAddress, int) + + @param socket + @param endpoint + @param timeout - timeout in milliseconds]]> + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + node + + @param node + a node + @return true if node is already in the tree; false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + scope + if scope starts with ~, choose one from the all nodes except for the + ones in scope; otherwise, choose one from scope + @param scope range of nodes from which a node will be choosen + @return the choosen node]]> + + + + + + + scope but not in excludedNodes + if scope starts with ~, return the number of nodes that are not + in scope and excludedNodes; + @param scope a path string that may start with ~ + @param excludedNodes a list of nodes + @return number of available nodes]]> + + + + + + + + + + + + reader + It linearly scans the array, if a local node is found, swap it with + the first element of the array. + If a local rack node is found, swap it with the first element following + the local node. + If neither local node or local rack node is found, put a random replica + location at postion 0. + It leaves the rest nodes untouched.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + Create a new input stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + + @see SocketInputStream#SocketInputStream(ReadableByteChannel, long) + + @param socket should have a channel associated with it. + @param timeout timeout timeout in milliseconds. must not be negative. + @throws IOException]]> +
    +
    + + + +
    + + Create a new input stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + @see SocketInputStream#SocketInputStream(ReadableByteChannel, long) + + @param socket should have a channel associated with it. + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + +
    + + Create a new ouput stream with the given timeout. If the timeout + is zero, it will be treated as infinite timeout. The socket's + channel will be configured to be non-blocking. + + @see SocketOutputStream#SocketOutputStream(WritableByteChannel, long) + + @param socket should have a channel associated with it. + @param timeout timeout timeout in milliseconds. must not be negative. + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + = getCount(). + @param newCapacity The new capacity in bytes.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Index idx = startVector(...); + while (!idx.done()) { + .... // read element of a vector + idx.incr(); + } + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This task takes the given record definition files and compiles them into + java or c++ + files. It is then up to the user to compile the generated files. + +

    The task requires the file or the nested fileset element to be + specified. Optional attributes are language (set the output + language, default is "java"), + destdir (name of the destination directory for generated java/c++ + code, default is ".") and failonerror (specifies error handling + behavior. default is true). +

    Usage

    +
    + <recordcc
    +       destdir="${basedir}/gensrc"
    +       language="java">
    +   <fileset include="**\/*.jr" />
    + </recordcc>
    + 
    ]]> +
    +
    + +
    + + + + + + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (cause==null ? null : cause.toString()) (which + typically contains the class and detail message of cause). + @param cause the cause (which is saved for later retrieval by the + {@link #getCause()} method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.)]]> + + + + + + + + + + + + + Group with the given groupname. + @param group group name]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ugi. + @param ugi user + @return the {@link Subject} for the user identified by ugi]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ugi as a comma separated string in + conf as a property attr + + The String starts with the user name followed by the default group names, + and other group names. + + @param conf configuration + @param attr property name + @param ugi a UnixUserGroupInformation]]> + + + + + + + + conf + + The object is expected to store with the property name attr + as a comma separated string that starts + with the user name followed by group names. + If the property name is not defined, return null. + It's assumed that there is only one UGI per user. If this user already + has a UGI in the ugi map, return the ugi in the map. + Otherwise, construct a UGI from the configuration, store it in the + ugi map and return it. + + @param conf configuration + @param attr property name + @return a UnixUGI + @throws LoginException if the stored string is ill-formatted.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + User with the given username. + @param user user name]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (cause==null ? null : cause.toString()) (which + typically contains the class and detail message of cause). + @param cause the cause (which is saved for later retrieval by the + {@link #getCause()} method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.)]]> + + + + + + + + + + + + + + does not provide the stack trace for security purposes.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + service as related to + Service Level Authorization for Hadoop. + + Each service defines it's configuration key and also the necessary + {@link Permission} required to access the service.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + in]]> + + + + + + + out.]]> + + + + + + + + + + reset is true, then resets the checksum. + @return number of bytes written. Will be equal to getChecksumSize();]]> + + + + + + + + + reset is true, then resets the checksum. + @return number of bytes written. Will be equal to getChecksumSize();]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GenericOptionsParser to parse only the generic Hadoop + arguments. + + The array of string arguments other than the generic arguments can be + obtained by {@link #getRemainingArgs()}. + + @param conf the Configuration to modify. + @param args command-line arguments.]]> + + + + + GenericOptionsParser to parse given options as well + as generic Hadoop options. + + The resulting CommandLine object can be obtained by + {@link #getCommandLine()}. + + @param conf the configuration to modify + @param options options built by the caller + @param args User-specified arguments]]> + + + + + Strings containing the un-parsed arguments + or empty array if commandLine was not defined.]]> + + + + + + + + + + CommandLine object + to process the parsed arguments. + + Note: If the object is created with + {@link #GenericOptionsParser(Configuration, String[])}, then returned + object will only contain parsed generic options. + + @return CommandLine representing list of arguments + parsed against Options descriptor.]]> + + + + + + + + + + + + + + + + + GenericOptionsParser is a utility to parse command line + arguments generic to the Hadoop framework. + + GenericOptionsParser recognizes several standarad command + line arguments, enabling applications to easily specify a namenode, a + jobtracker, additional configuration resources etc. + +

    Generic Options

    + +

    The supported generic options are:

    +

    +     -conf <configuration file>     specify a configuration file
    +     -D <property=value>            use value for given property
    +     -fs <local|namenode:port>      specify a namenode
    +     -jt <local|jobtracker:port>    specify a job tracker
    +     -files <comma separated list of files>    specify comma separated
    +                            files to be copied to the map reduce cluster
    +     -libjars <comma separated list of jars>   specify comma separated
    +                            jar files to include in the classpath.
    +     -archives <comma separated list of archives>    specify comma
    +             separated archives to be unarchived on the compute machines.
    +
    + 

    + +

    The general command line syntax is:

    +

    + bin/hadoop command [genericOptions] [commandOptions]
    + 

    + +

    Generic command line arguments might modify + Configuration objects, given to constructors.

    + +

    The functionality is implemented using Commons CLI.

    + +

    Examples:

    +

    + $ bin/hadoop dfs -fs darwin:8020 -ls /data
    + list /data directory in dfs with namenode darwin:8020
    + 
    + $ bin/hadoop dfs -D fs.default.name=darwin:8020 -ls /data
    + list /data directory in dfs with namenode darwin:8020
    +     
    + $ bin/hadoop dfs -conf hadoop-site.xml -ls /data
    + list /data directory in dfs with conf specified in hadoop-site.xml
    +     
    + $ bin/hadoop job -D mapred.job.tracker=darwin:50020 -submit job.xml
    + submit a job to job tracker darwin:50020
    +     
    + $ bin/hadoop job -jt darwin:50020 -submit job.xml
    + submit a job to job tracker darwin:50020
    +     
    + $ bin/hadoop job -jt local -submit job.xml
    + submit a job to local runner
    + 
    + $ bin/hadoop jar -libjars testlib.jar 
    + -archives test.tgz -files file.txt inputjar args
    + job submission with libjars, files and archives
    + 

    + + @see Tool + @see ToolRunner]]> +
    +
    + + + + + + + + + Class<T>) of the + argument of type T. + @param The type of the argument + @param t the object to get it class + @return Class<T>]]> + + + + + + + List<T> to a an array of + T[]. + @param c the Class object of the items in the list + @param list the list to convert]]> + + + + + + List<T> to a an array of + T[]. + @param list the list to convert + @throws ArrayIndexOutOfBoundsException if the list is empty. + Use {@link #toArray(Class, List)} if the list may be empty.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + io.file.buffer.size specified in the given + Configuration. + @param in input stream + @param conf configuration + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if native-hadoop is loaded, + else false]]> + + + + + + true if native hadoop libraries, if present, can be + used for this job; false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { pq.top().change(); pq.adjustTop(); } + instead of
    +  { o = pq.pop(); o.change(); pq.push(o); }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Clients and/or applications can use the provided Progressable + to explicitly report progress to the Hadoop framework. This is especially + important for operations which take an insignificant amount of time since, + in-lieu of the reported progress, the framework has to assume that an error + has occured and time-out the operation.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Class is to be obtained + @return the correctly typed Class of the given object.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Hadoop Pipes + or Hadoop Streaming. + + It also checks to ensure that we are running on a *nix platform else + (e.g. in Cygwin/Windows) it returns null. + @param conf configuration + @return a String[] with the ulimit command arguments or + null if we are running on a non *nix platform or + if the limit is unspecified.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Shell interface. + @param cmd shell command to execute. + @return the output of the executed command.]]> + + + + + + + + Shell interface. + @param env the map of environment key=value + @param cmd shell command to execute. + @return the output of the executed command.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + Shell can be used to run unix commands like du or + df. It also offers facilities to gate commands by + time-intervals.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ShellCommandExecutorshould be used in cases where the output + of the command needs no explicit parsing and where the command, working + directory and the environment remains unchanged. The output of the command + is stored as-is and is expected to be small.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ArrayList of string values]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + charToEscape in the string + with the escape char escapeChar + + @param str string + @param escapeChar escape char + @param charToEscape the char to be escaped + @return an escaped string]]> + + + + + + + + + + + + + + + + + + + + + + charToEscape in the string + with the escape char escapeChar + + @param str string + @param escapeChar escape char + @param charToEscape the escaped char + @return an unescaped string]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tool, is the standard for any Map-Reduce tool/application. + The tool/application should delegate the handling of + + standard command-line options to {@link ToolRunner#run(Tool, String[])} + and only handle its custom arguments.

    + +

    Here is how a typical Tool is implemented:

    +

    +     public class MyApp extends Configured implements Tool {
    +     
    +       public int run(String[] args) throws Exception {
    +         // Configuration processed by ToolRunner
    +         Configuration conf = getConf();
    +         
    +         // Create a JobConf using the processed conf
    +         JobConf job = new JobConf(conf, MyApp.class);
    +         
    +         // Process custom command-line options
    +         Path in = new Path(args[1]);
    +         Path out = new Path(args[2]);
    +         
    +         // Specify various job-specific parameters     
    +         job.setJobName("my-app");
    +         job.setInputPath(in);
    +         job.setOutputPath(out);
    +         job.setMapperClass(MyApp.MyMapper.class);
    +         job.setReducerClass(MyApp.MyReducer.class);
    +
    +         // Submit the job, then poll for progress until the job is complete
    +         JobClient.runJob(job);
    +       }
    +       
    +       public static void main(String[] args) throws Exception {
    +         // Let ToolRunner handle generic command-line options 
    +         int res = ToolRunner.run(new Configuration(), new Sort(), args);
    +         
    +         System.exit(res);
    +       }
    +     }
    + 

    + + @see GenericOptionsParser + @see ToolRunner]]> +
    +
    + + + + + + + + + + + + Tool by {@link Tool#run(String[])}, after + parsing with the given generic arguments. Uses the given + Configuration, or builds one if null. + + Sets the Tool's configuration with the possibly modified + version of the conf. + + @param conf Configuration for the Tool. + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + Tool with its Configuration. + + Equivalent to run(tool.getConf(), tool, args). + + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method.]]> + + + + + + + + + + ToolRunner can be used to run classes implementing + Tool interface. It works in conjunction with + {@link GenericOptionsParser} to parse the + + generic hadoop command line arguments and modifies the + Configuration of the Tool. The + application-specific options are passed along without being modified. +

    + + @see Tool + @see GenericOptionsParser]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Bloom filter, as defined by Bloom in 1970. +

    + The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by + the networking research community in the past decade thanks to the bandwidth efficiencies that it + offers for the transmission of set membership information between networked hosts. A sender encodes + the information into a bit vector, the Bloom filter, that is more compact than a conventional + representation. Computation and space costs for construction are linear in the number of elements. + The receiver uses the filter to test whether various elements are members of the set. Though the + filter will occasionally return a false positive, it will never return a false negative. When creating + the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size. + +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + + @see Space/Time Trade-Offs in Hash Coding with Allowable Errors]]> + + + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + this counting Bloom filter. +

    + Invariant: nothing happens if the specified key does not belong to this counter Bloom filter. + @param key The key to remove.]]> + + + + + + + + + + + + key -> count map. +

    NOTE: due to the bucket size of this filter, inserting the same + key more than 15 times will cause an overflow at all filter positions + associated with this key, and it will significantly increase the error + rate for this and other keys. For this reason the filter can only be + used to store small count values 0 <= N << 15. + @param key key to be tested + @return 0 if the key is not present. Otherwise, a positive value v will + be returned such that v == count with probability equal to the + error rate of this filter, and v > count otherwise. + Additionally, if the filter experienced an underflow as a result of + {@link #delete(Key)} operation, the return value may be lower than the + count with the probability of the false negative rate of such + filter.]]> + + + + + + + + + + + + + + + + + + + + + + counting Bloom filter, as defined by Fan et al. in a ToN + 2000 paper. +

    + A counting Bloom filter is an improvement to standard a Bloom filter as it + allows dynamic additions and deletions of set membership information. This + is achieved through the use of a counting vector instead of a bit vector. +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + + @see Summary cache: a scalable wide-area web cache sharing protocol]]> + + + + + + + + + + + + + + Builds an empty Dynamic Bloom filter. + @param vectorSize The number of bits in the vector. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}). + @param nr The threshold for the maximum number of keys to record in a + dynamic Bloom filter row.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + dynamic Bloom filter, as defined in the INFOCOM 2006 paper. +

    + A dynamic Bloom filter (DBF) makes use of a s * m bit matrix but + each of the s rows is a standard Bloom filter. The creation + process of a DBF is iterative. At the start, the DBF is a 1 * m + bit matrix, i.e., it is composed of a single standard Bloom filter. + It assumes that nr elements are recorded in the + initial bit vector, where nr <= n (n is + the cardinality of the set A to record in the filter). +

    + As the size of A grows during the execution of the application, + several keys must be inserted in the DBF. When inserting a key into the DBF, + one must first get an active Bloom filter in the matrix. A Bloom filter is + active when the number of recorded keys, nr, is + strictly less than the current cardinality of A, n. + If an active Bloom filter is found, the key is inserted and + nr is incremented by one. On the other hand, if there + is no active Bloom filter, a new one is created (i.e., a new row is added to + the matrix) according to the current size of A and the element + is added in this new Bloom filter and the nr value of + this new Bloom filter is set to one. A given key is said to belong to the + DBF if the k positions are set to one in one of the matrix rows. +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + @see BloomFilter A Bloom filter + + @see Theory and Network Applications of Dynamic Bloom Filters]]> + + + + + + + + + + + this filter. + @param nbHash The number of hash functions to consider. + @param hashType type of the hashing function (see {@link Hash}).]]> + + + + + + this filter. + @param key The key to add.]]> + + + + + + this filter. + @param key The key to test. + @return boolean True if the specified key belongs to this filter. + False otherwise.]]> + + + + + + this filter and a specified filter. +

    + Invariant: The result is assigned to this filter. + @param filter The filter to AND with.]]> + + + + + + this filter and a specified filter. +

    + Invariant: The result is assigned to this filter. + @param filter The filter to OR with.]]> + + + + + + this filter and a specified filter. +

    + Invariant: The result is assigned to this filter. + @param filter The filter to XOR with.]]> + + + + + this filter. +

    + The result is assigned to this filter.]]> + + + + + + this filter. + @param keys The list of keys.]]> + + + + + + this filter. + @param keys The collection of keys.]]> + + + + + + this filter. + @param keys The array of keys.]]> + + + + + + + + + + + + + this filter.]]> + + + + + + + + + + + + + + + + + + + + A filter is a data structure which aims at offering a lossy summary of a set A. The + key idea is to map entries of A (also called keys) into several positions + in a vector through the use of several hash functions. +

    + Typically, a filter will be implemented as a Bloom filter (or a Bloom filter extension). +

    + It must be extended in order to define the real behavior. + + @see Key The general behavior of a key + @see HashFunction A hash function]]> + + + + + + + + + Builds a hash function that must obey to a given maximum number of returned values and a highest value. + @param maxValue The maximum highest returned value. + @param nbHash The number of resulting hashed values. + @param hashType type of the hashing function (see {@link Hash}).]]> + + + + + this hash function. A NOOP]]> + + + + + + + + + + + + + + + + + + + + + + + + + Builds a key with a default weight. + @param value The byte value of this key.]]> + + + + + + Builds a key with a specified weight. + @param value The value of this key. + @param weight The weight associated to this key.]]> + + + + + + + + + + + + this key.]]> + + + + + this key.]]> + + + + + + this key with a specified value. + @param weight The increment.]]> + + + + + this key by one.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The idea is to randomly select a bit to reset.]]> + + + + + + The idea is to select the bit to reset that will generate the minimum + number of false negative.]]> + + + + + + The idea is to select the bit to reset that will remove the maximum number + of false positive.]]> + + + + + + The idea is to select the bit to reset that will, at the same time, remove + the maximum number of false positve while minimizing the amount of false + negative generated.]]> + + + + + Originally created by + European Commission One-Lab Project 034819.]]> + + + + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + this retouched Bloom filter. +

    + Invariant: if the false positive is null, nothing happens. + @param key The false positive key to add.]]> + + + + + + this retouched Bloom filter. + @param coll The collection of false positive.]]> + + + + + + this retouched Bloom filter. + @param keys The list of false positive.]]> + + + + + + this retouched Bloom filter. + @param keys The array of false positive.]]> + + + + + + + this retouched Bloom filter. + @param scheme The selective clearing scheme to apply.]]> + + + + + + + + + + + + retouched Bloom filter, as defined in the CoNEXT 2006 paper. +

    + It allows the removal of selected false positives at the cost of introducing + random false negatives, and with the benefit of eliminating some random false + positives at the same time. + +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + @see BloomFilter A Bloom filter + @see RemoveScheme The different selective clearing algorithms + + @see Retouched Bloom Filters: Allowing Networked Applications to Trade Off Selected False Positives Against False Negatives]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + length, and + the provided seed value + @param bytes input bytes + @param length length of the valid bytes to consider + @param initval seed value + @return hash value]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The best hash table sizes are powers of 2. There is no need to do mod + a prime (mod is sooo slow!). If you need less than 32 bits, use a bitmask. + For example, if you need only 10 bits, do + h = (h & hashmask(10)); + In which case, the hash table should have hashsize(10) elements. + +

    If you are hashing n strings byte[][] k, do it like this: + for (int i = 0, h = 0; i < n; ++i) h = hash( k[i], h); + +

    By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this + code any way you wish, private, educational, or commercial. It's free. + +

    Use for hash table lookup, or anything where one collision in 2^^32 is + acceptable. Do NOT use for cryptographic purposes.]]> + + + + + + + + + + + lookup3.c, by Bob Jenkins, May 2006, Public Domain. + + You can use this free for any purpose. It's in the public domain. + It has no warranty. + + + @see lookup3.c + @see Hash Functions (and how this + function compares to others such as CRC, MD?, etc + @see Has update on the + Dr. Dobbs Article]]> + + + + + + + + + + + + + + + + The C version of MurmurHash 2.0 found at that site was ported + to Java by Andrzej Bialecki (ab at getopt org).

    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobTracker, + as {@link JobTracker.State} + + @return the current state of the JobTracker.]]> + + + + + JobTracker + + @return the size of heap memory used by the JobTracker]]> + + + + + JobTracker + + @return the configured size of max heap memory that can be used by the JobTracker]]> + + + + + + + + + + + + ClusterStatus provides clients with information such as: +
      +
    1. + Size of the cluster. +
    2. +
    3. + Name of the trackers. +
    4. +
    5. + Task capacity of the cluster. +
    6. +
    7. + The number of currently running map & reduce tasks. +
    8. +
    9. + State of the JobTracker. +
    10. +

    + +

    Clients can query for the latest ClusterStatus, via + {@link JobClient#getClusterStatus()}.

    + + @see JobClient]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Counters represent global counters, defined either by the + Map-Reduce framework or applications. Each Counter can be of + any {@link Enum} type.

    + +

    Counters are bunched into {@link Group}s, each comprising of + counters from a particular Enum class. + @deprecated Use {@link org.apache.hadoop.mapreduce.Counters} instead.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Group of counters, comprising of counters from a particular + counter {@link Enum} class. + +

    Grouphandles localization of the class name and the + counter names.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileInputFormat implementations can override this and return + false to ensure that individual input files are never split-up + so that {@link Mapper}s process entire files. + + @param fs the file system that the file is on + @param filename the file name to check + @return is this file splitable?]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileInputFormat is the base class for all file-based + InputFormats. This provides a generic implementation of + {@link #getSplits(JobConf, int)}. + Subclasses of FileInputFormat can also override the + {@link #isSplitable(FileSystem, Path)} method to ensure input-files are + not split-up and are processed as a whole by {@link Mapper}s. + @deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat} + instead.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the job output should be compressed, + false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tasks' Side-Effect Files + +

    Note: The following is valid only if the {@link OutputCommitter} + is {@link FileOutputCommitter}. If OutputCommitter is not + a FileOutputCommitter, the task's temporary output + directory is same as {@link #getOutputPath(JobConf)} i.e. + ${mapred.output.dir}$

    + +

    Some applications need to create/write-to side-files, which differ from + the actual job-outputs. + +

    In such cases there could be issues with 2 instances of the same TIP + (running simultaneously e.g. speculative tasks) trying to open/write-to the + same file (path) on HDFS. Hence the application-writer will have to pick + unique names per task-attempt (e.g. using the attemptid, say + attempt_200709221812_0001_m_000000_0), not just per TIP.

    + +

    To get around this the Map-Reduce framework helps the application-writer + out by maintaining a special + ${mapred.output.dir}/_temporary/_${taskid} + sub-directory for each task-attempt on HDFS where the output of the + task-attempt goes. On successful completion of the task-attempt the files + in the ${mapred.output.dir}/_temporary/_${taskid} (only) + are promoted to ${mapred.output.dir}. Of course, the + framework discards the sub-directory of unsuccessful task-attempts. This + is completely transparent to the application.

    + +

    The application-writer can take advantage of this by creating any + side-files required in ${mapred.work.output.dir} during execution + of his reduce-task i.e. via {@link #getWorkOutputPath(JobConf)}, and the + framework will move them out similarly - thus she doesn't have to pick + unique paths per task-attempt.

    + +

    Note: the value of ${mapred.work.output.dir} during + execution of a particular task-attempt is actually + ${mapred.output.dir}/_temporary/_{$taskid}, and this value is + set by the map-reduce framework. So, just create any side-files in the + path returned by {@link #getWorkOutputPath(JobConf)} from map/reduce + task to take advantage of this feature.

    + +

    The entire discussion holds true for maps of jobs with + reducer=NONE (i.e. 0 reduces) since output of the map, in that case, + goes directly to HDFS.

    + + @return the {@link Path} to the task's temporary output directory + for the map-reduce job.]]> +
    +
    + + + + + + + + + + + + + The generated name can be used to create custom files from within the + different tasks for the job, the names for different tasks will not collide + with each other.

    + +

    The given name is postfixed with the task type, 'm' for maps, 'r' for + reduces and the task partition number. For example, give a name 'test' + running on the first map o the job the generated name will be + 'test-m-00000'.

    + + @param conf the configuration for the job. + @param name the name to make unique. + @return a unique name accross all tasks of the job.]]> +
    +
    + + + + + The path can be used to create custom files from within the map and + reduce tasks. The path name will be unique for each task. The path parent + will be the job output directory.

    ls + +

    This method uses the {@link #getUniqueName} method to make the file name + unique for the task.

    + + @param conf the configuration for the job. + @param name the name for the file. + @return a unique path accross all tasks of the job.]]> +
    +
    + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Each {@link InputSplit} is then assigned to an individual {@link Mapper} + for processing.

    + +

    Note: The split is a logical split of the inputs and the + input files are not physically split into chunks. For e.g. a split could + be <input-file-path, start, offset> tuple. + + @param job job configuration. + @param numSplits the desired number of splits, a hint. + @return an array of {@link InputSplit}s for the job.]]> + + + + + + + + + It is the responsibility of the RecordReader to respect + record boundaries while processing the logical split to present a + record-oriented view to the individual task.

    + + @param split the {@link InputSplit} + @param job the job that this split belongs to + @return a {@link RecordReader}]]> +
    +
    + + InputFormat describes the input-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the InputFormat of the + job to:

    +

      +
    1. + Validate the input-specification of the job. +
    2. + Split-up the input file(s) into logical {@link InputSplit}s, each of + which is then assigned to an individual {@link Mapper}. +
    3. +
    4. + Provide the {@link RecordReader} implementation to be used to glean + input records from the logical InputSplit for processing by + the {@link Mapper}. +
    5. +
    + +

    The default behavior of file-based {@link InputFormat}s, typically + sub-classes of {@link FileInputFormat}, is to split the + input into logical {@link InputSplit}s based on the total size, in + bytes, of the input files. However, the {@link FileSystem} blocksize of + the input files is treated as an upper bound for input splits. A lower bound + on the split size can be set via + + mapred.min.split.size.

    + +

    Clearly, logical splits based on input-size is insufficient for many + applications since record boundaries are to respected. In such cases, the + application has to also implement a {@link RecordReader} on whom lies the + responsibilty to respect record-boundaries and present a record-oriented + view of the logical InputSplit to the individual task. + + @see InputSplit + @see RecordReader + @see JobClient + @see FileInputFormat + @deprecated Use {@link org.apache.hadoop.mapreduce.InputFormat} instead.]]> + + + + + + + + + + InputSplit. + + @return the number of bytes in the input split. + @throws IOException]]> + + + + + + InputSplit is + located as an array of Strings. + @throws IOException]]> + + + + InputSplit represents the data to be processed by an + individual {@link Mapper}. + +

    Typically, it presents a byte-oriented view on the input and is the + responsibility of {@link RecordReader} of the job to process this and present + a record-oriented view. + + @see InputFormat + @see RecordReader + @deprecated Use {@link org.apache.hadoop.mapreduce.InputSplit} instead.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobClient.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + jobid doesn't correspond to any known job. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobClient is the primary interface for the user-job to interact + with the {@link JobTracker}. + + JobClient provides facilities to submit jobs, track their + progress, access component-tasks' reports/logs, get the Map-Reduce cluster + status information etc. + +

    The job submission process involves: +

      +
    1. + Checking the input and output specifications of the job. +
    2. +
    3. + Computing the {@link InputSplit}s for the job. +
    4. +
    5. + Setup the requisite accounting information for the {@link DistributedCache} + of the job, if necessary. +
    6. +
    7. + Copying the job's jar and configuration to the map-reduce system directory + on the distributed file-system. +
    8. +
    9. + Submitting the job to the JobTracker and optionally monitoring + it's status. +
    10. +

    + + Normally the user creates the application, describes various facets of the + job via {@link JobConf} and then uses the JobClient to submit + the job and monitor its progress. + +

    Here is an example on how to use JobClient:

    +

    +     // Create a new JobConf
    +     JobConf job = new JobConf(new Configuration(), MyJob.class);
    +     
    +     // Specify various job-specific parameters     
    +     job.setJobName("myjob");
    +     
    +     job.setInputPath(new Path("in"));
    +     job.setOutputPath(new Path("out"));
    +     
    +     job.setMapperClass(MyJob.MyMapper.class);
    +     job.setReducerClass(MyJob.MyReducer.class);
    +
    +     // Submit the job, then poll for progress until the job is complete
    +     JobClient.runJob(job);
    + 

    + +

    Job Control

    + +

    At times clients would chain map-reduce jobs to accomplish complex tasks + which cannot be done via a single map-reduce job. This is fairly easy since + the output of the job, typically, goes to distributed file-system and that + can be used as the input for the next job.

    + +

    However, this also means that the onus on ensuring jobs are complete + (success/failure) lies squarely on the clients. In such situations the + various job-control options are: +

      +
    1. + {@link #runJob(JobConf)} : submits the job and returns only after + the job has completed. +
    2. +
    3. + {@link #submitJob(JobConf)} : only submits the job, then poll the + returned handle to the {@link RunningJob} to query status and make + scheduling decisions. +
    4. +
    5. + {@link JobConf#setJobEndNotificationURI(String)} : setup a notification + on job-completion, thus avoiding polling. +
    6. +

    + + @see JobConf + @see ClusterStatus + @see Tool + @see DistributedCache]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If the parameter {@code loadDefaults} is false, the new instance + will not load resources from the default files. + + @param loadDefaults specifies whether to load from the default files]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if framework should keep the intermediate files + for failed tasks, false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the outputs of the maps are to be compressed, + false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This comparator should be provided if the equivalence rules for keys + for sorting the intermediates are different from those for grouping keys + before each call to + {@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.

    + +

    For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed + in a single call to the reduce function if K1 and K2 compare as equal.

    + +

    Since {@link #setOutputKeyComparatorClass(Class)} can be used to control + how keys are sorted, this can be used in conjunction to simulate + secondary sort on values.

    + +

    Note: This is not a guarantee of the reduce sort being + stable in any sense. (In any case, with the order of available + map-outputs to the reduce being non-deterministic, it wouldn't make + that much sense.)

    + + @param theClass the comparator class to be used for grouping keys. + It should implement RawComparator. + @see #setOutputKeyComparatorClass(Class)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + combiner class used to combine map-outputs + before being sent to the reducers. Typically the combiner is same as the + the {@link Reducer} for the job i.e. {@link #getReducerClass()}. + + @return the user-defined combiner class used to combine map-outputs.]]> + + + + + + combiner class used to combine map-outputs + before being sent to the reducers. + +

    The combiner is an application-specified aggregation operation, which + can help cut down the amount of data transferred between the + {@link Mapper} and the {@link Reducer}, leading to better performance.

    + +

    The framework may invoke the combiner 0, 1, or multiple times, in both + the mapper and reducer tasks. In general, the combiner is called as the + sort/merge result is written to disk. The combiner must: +

      +
    • be side-effect free
    • +
    • have the same input and output key types and the same input and + output value types
    • +

    + +

    Typically the combiner is same as the Reducer for the + job i.e. {@link #setReducerClass(Class)}.

    + + @param theClass the user-defined combiner class used to combine + map-outputs.]]> +
    +
    + + + true. + + @return true if speculative execution be used for this job, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on, else false.]]> + + + + + true. + + @return true if speculative execution be + used for this job for map tasks, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on for map tasks, + else false.]]> + + + + + true. + + @return true if speculative execution be used + for reduce tasks for this job, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on for reduce tasks, + else false.]]> + + + + + 1. + + @return the number of reduce tasks for this job.]]> + + + + + + Note: This is only a hint to the framework. The actual + number of spawned map tasks depends on the number of {@link InputSplit}s + generated by the job's {@link InputFormat#getSplits(JobConf, int)}. + + A custom {@link InputFormat} is typically used to accurately control + the number of map tasks for the job.

    + +

    How many maps?

    + +

    The number of maps is usually driven by the total size of the inputs + i.e. total number of blocks of the input files.

    + +

    The right level of parallelism for maps seems to be around 10-100 maps + per-node, although it has been set up to 300 or so for very cpu-light map + tasks. Task setup takes awhile, so it is best if the maps take at least a + minute to execute.

    + +

    The default behavior of file-based {@link InputFormat}s is to split the + input into logical {@link InputSplit}s based on the total size, in + bytes, of input files. However, the {@link FileSystem} blocksize of the + input files is treated as an upper bound for input splits. A lower bound + on the split size can be set via + + mapred.min.split.size.

    + +

    Thus, if you expect 10TB of input data and have a blocksize of 128MB, + you'll end up with 82,000 maps, unless {@link #setNumMapTasks(int)} is + used to set it even higher.

    + + @param n the number of map tasks for this job. + @see InputFormat#getSplits(JobConf, int) + @see FileInputFormat + @see FileSystem#getDefaultBlockSize() + @see FileStatus#getBlockSize()]]> +
    +
    + + + 1. + + @return the number of reduce tasks for this job.]]> + + + + + + How many reduces? + +

    The right number of reduces seems to be 0.95 or + 1.75 multiplied by (<no. of nodes> * + + mapred.tasktracker.reduce.tasks.maximum). +

    + +

    With 0.95 all of the reduces can launch immediately and + start transfering map outputs as the maps finish. With 1.75 + the faster nodes will finish their first round of reduces and launch a + second wave of reduces doing a much better job of load balancing.

    + +

    Increasing the number of reduces increases the framework overhead, but + increases load balancing and lowers the cost of failures.

    + +

    The scaling factors above are slightly less than whole numbers to + reserve a few reduce slots in the framework for speculative-tasks, failures + etc.

    + +

    Reducer NONE

    + +

    It is legal to set the number of reduce-tasks to zero.

    + +

    In this case the output of the map-tasks directly go to distributed + file-system, to the path set by + {@link FileOutputFormat#setOutputPath(JobConf, Path)}. Also, the + framework doesn't sort the map-outputs before writing it out to HDFS.

    + + @param n the number of reduce tasks for this job.]]> +
    +
    + + + mapred.map.max.attempts + property. If this property is not already set, the default is 4 attempts. + + @return the max number of attempts per map task.]]> + + + + + + + + + + + mapred.reduce.max.attempts + property. If this property is not already set, the default is 4 attempts. + + @return the max number of attempts per reduce task.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + noFailures, the + tasktracker is blacklisted for this job. + + @param noFailures maximum no. of failures of a given job per tasktracker.]]> + + + + + blacklisted for this job. + + @return the maximum no. of failures of a given job per tasktracker.]]> + + + + + failed. + + Defaults to zero, i.e. any failed map-task results in + the job being declared as {@link JobStatus#FAILED}. + + @return the maximum percentage of map tasks that can fail without + the job being aborted.]]> + + + + + + failed. + + @param percent the maximum percentage of map tasks that can fail without + the job being aborted.]]> + + + + + failed. + + Defaults to zero, i.e. any failed reduce-task results + in the job being declared as {@link JobStatus#FAILED}. + + @return the maximum percentage of reduce tasks that can fail without + the job being aborted.]]> + + + + + + failed. + + @param percent the maximum percentage of reduce tasks that can fail without + the job being aborted.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The debug script can aid debugging of failed map tasks. The script is + given task's stdout, stderr, syslog, jobconf files as arguments.

    + +

    The debug command, run on the node where the map failed, is:

    +

    + $script $stdout $stderr $syslog $jobconf. +

    + +

    The script file is distributed through {@link DistributedCache} + APIs. The script needs to be symlinked.

    + +

    Here is an example on how to submit a script +

    + job.setMapDebugScript("./myscript");
    + DistributedCache.createSymlink(job);
    + DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
    + 

    + + @param mDbgScript the script name]]> +
    +
    + + + + + + + + + The debug script can aid debugging of failed reduce tasks. The script + is given task's stdout, stderr, syslog, jobconf files as arguments.

    + +

    The debug command, run on the node where the map failed, is:

    +

    + $script $stdout $stderr $syslog $jobconf. +

    + +

    The script file is distributed through {@link DistributedCache} + APIs. The script file needs to be symlinked

    + +

    Here is an example on how to submit a script +

    + job.setReduceDebugScript("./myscript");
    + DistributedCache.createSymlink(job);
    + DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
    + 

    + + @param rDbgScript the script name]]> +
    +
    + + + + + + + + null if it hasn't + been set. + @see #setJobEndNotificationURI(String)]]> + + + + + + The uri can contain 2 special parameters: $jobId and + $jobStatus. Those, if present, are replaced by the job's + identifier and completion-status respectively.

    + +

    This is typically used by application-writers to implement chaining of + Map-Reduce jobs in an asynchronous manner.

    + + @param uri the job end notification uri + @see JobStatus + @see Job Completion and Chaining]]> +
    +
    + + + + When a job starts, a shared directory is created at location + + ${mapred.local.dir}/taskTracker/jobcache/$jobid/work/ . + This directory is exposed to the users through + job.local.dir . + So, the tasks can use this space + as scratch space and share files among them.

    + This value is available as System property also. + + @return The localized job specific shared directory]]> +
    +
    + + + + For backward compatibility, if the job configuration sets the + key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different + from {@link #DISABLED_MEMORY_LIMIT}, that value will be used + after converting it from bytes to MB. + @return memory required to run a map task of the job, in MB, + or {@link #DISABLED_MEMORY_LIMIT} if unset.]]> + + + + + + + + + For backward compatibility, if the job configuration sets the + key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different + from {@link #DISABLED_MEMORY_LIMIT}, that value will be used + after converting it from bytes to MB. + @return memory required to run a reduce task of the job, in MB, + or {@link #DISABLED_MEMORY_LIMIT} if unset.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + This method is deprecated. Now, different memory limits can be + set for map and reduce tasks of a job, in MB. +

    + For backward compatibility, if the job configuration sets the + key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different + from {@link #DISABLED_MEMORY_LIMIT}, that value is returned. + Otherwise, this method will return the larger of the values returned by + {@link #getMemoryForMapTask()} and {@link #getMemoryForReduceTask()} + after converting them into bytes. + + @return Memory required to run a task of this job, in bytes, + or {@link #DISABLED_MEMORY_LIMIT}, if unset. + @see #setMaxVirtualMemoryForTask(long) + @deprecated Use {@link #getMemoryForMapTask()} and + {@link #getMemoryForReduceTask()}]]> + + + + + + + mapred.task.maxvmem is split into + mapred.job.map.memory.mb + and mapred.job.map.memory.mb,mapred + each of the new key are set + as mapred.task.maxvmem / 1024 + as new values are in MB + + @param vmem Maximum amount of virtual memory in bytes any task of this job + can use. + @see #getMaxVirtualMemoryForTask() + @deprecated + Use {@link #setMemoryForMapTask(long mem)} and + Use {@link #setMemoryForReduceTask(long mem)}]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobConf is the primary interface for a user to describe a + map-reduce job to the Hadoop framework for execution. The framework tries to + faithfully execute the job as-is described by JobConf, however: +

      +
    1. + Some configuration parameters might have been marked as + + final by administrators and hence cannot be altered. +
    2. +
    3. + While some job parameters are straight-forward to set + (e.g. {@link #setNumReduceTasks(int)}), some parameters interact subtly + rest of the framework and/or job-configuration and is relatively more + complex for the user to control finely (e.g. {@link #setNumMapTasks(int)}). +
    4. +

    + +

    JobConf typically specifies the {@link Mapper}, combiner + (if any), {@link Partitioner}, {@link Reducer}, {@link InputFormat} and + {@link OutputFormat} implementations to be used etc. + +

    Optionally JobConf is used to specify other advanced facets + of the job such as Comparators to be used, files to be put in + the {@link DistributedCache}, whether or not intermediate and/or job outputs + are to be compressed (and how), debugability via user-provided scripts + ( {@link #setMapDebugScript(String)}/{@link #setReduceDebugScript(String)}), + for doing post-processing on task logs, task's stdout, stderr, syslog. + and etc.

    + +

    Here is an example on how to configure a job via JobConf:

    +

    +     // Create a new JobConf
    +     JobConf job = new JobConf(new Configuration(), MyJob.class);
    +     
    +     // Specify various job-specific parameters     
    +     job.setJobName("myjob");
    +     
    +     FileInputFormat.setInputPaths(job, new Path("in"));
    +     FileOutputFormat.setOutputPath(job, new Path("out"));
    +     
    +     job.setMapperClass(MyJob.MyMapper.class);
    +     job.setCombinerClass(MyJob.MyReducer.class);
    +     job.setReducerClass(MyJob.MyReducer.class);
    +     
    +     job.setInputFormat(SequenceFileInputFormat.class);
    +     job.setOutputFormat(SequenceFileOutputFormat.class);
    + 

    + + @see JobClient + @see ClusterStatus + @see Tool + @see DistributedCache + @deprecated Use {@link Configuration} instead]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + .]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + any job + run on the jobtracker started at 200707121733, we would use : +
     
    + JobID.getTaskIDsPattern("200707121733", null);
    + 
    + which will return : +
     "job_200707121733_[0-9]*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @return a regex pattern matching JobIDs]]> +
    +
    + + + An example JobID is : + job_200707121733_0003 , which represents the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse JobID strings, but rather + use appropriate constructors or {@link #forName(String)} method. + + @see TaskID + @see TaskAttemptID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + "N/A" + + @return Scheduling information associated to particular Job Queue]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + zero. + + @param conf configuration for the JobTracker. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Output pairs need not be of the same types as input pairs. A given + input pair may map to zero or many output pairs. Output pairs are + collected with calls to + {@link OutputCollector#collect(Object,Object)}.

    + +

    Applications can use the {@link Reporter} provided to report progress + or just indicate that they are alive. In scenarios where the application + takes an insignificant amount of time to process individual key/value + pairs, this is crucial since the framework might assume that the task has + timed-out and kill that task. The other way of avoiding this is to set + + mapred.task.timeout to a high-enough value (or even zero for no + time-outs).

    + + @param key the input key. + @param value the input value. + @param output collects mapped keys and values. + @param reporter facility to report progress.]]> +
    + + + Maps are the individual tasks which transform input records into a + intermediate records. The transformed intermediate records need not be of + the same type as the input records. A given input pair may map to zero or + many output pairs.

    + +

    The Hadoop Map-Reduce framework spawns one map task for each + {@link InputSplit} generated by the {@link InputFormat} for the job. + Mapper implementations can access the {@link JobConf} for the + job via the {@link JobConfigurable#configure(JobConf)} and initialize + themselves. Similarly they can use the {@link Closeable#close()} method for + de-initialization.

    + +

    The framework then calls + {@link #map(Object, Object, OutputCollector, Reporter)} + for each key/value pair in the InputSplit for that task.

    + +

    All intermediate values associated with a given output key are + subsequently grouped by the framework, and passed to a {@link Reducer} to + determine the final output. Users can control the grouping by specifying + a Comparator via + {@link JobConf#setOutputKeyComparatorClass(Class)}.

    + +

    The grouped Mapper outputs are partitioned per + Reducer. Users can control which keys (and hence records) go to + which Reducer by implementing a custom {@link Partitioner}. + +

    Users can optionally specify a combiner, via + {@link JobConf#setCombinerClass(Class)}, to perform local aggregation of the + intermediate outputs, which helps to cut down the amount of data transferred + from the Mapper to the Reducer. + +

    The intermediate, grouped outputs are always stored in + {@link SequenceFile}s. Applications can specify if and how the intermediate + outputs are to be compressed and which {@link CompressionCodec}s are to be + used via the JobConf.

    + +

    If the job has + zero + reduces then the output of the Mapper is directly written + to the {@link FileSystem} without grouping by keys.

    + +

    Example:

    +

    +     public class MyMapper<K extends WritableComparable, V extends Writable> 
    +     extends MapReduceBase implements Mapper<K, V, K, V> {
    +     
    +       static enum MyCounters { NUM_RECORDS }
    +       
    +       private String mapTaskId;
    +       private String inputFile;
    +       private int noRecords = 0;
    +       
    +       public void configure(JobConf job) {
    +         mapTaskId = job.get("mapred.task.id");
    +         inputFile = job.get("map.input.file");
    +       }
    +       
    +       public void map(K key, V val,
    +                       OutputCollector<K, V> output, Reporter reporter)
    +       throws IOException {
    +         // Process the <key, value> pair (assume this takes a while)
    +         // ...
    +         // ...
    +         
    +         // Let the framework know that we are alive, and kicking!
    +         // reporter.progress();
    +         
    +         // Process some more
    +         // ...
    +         // ...
    +         
    +         // Increment the no. of <key, value> pairs processed
    +         ++noRecords;
    +
    +         // Increment counters
    +         reporter.incrCounter(NUM_RECORDS, 1);
    +        
    +         // Every 100 records update application-level status
    +         if ((noRecords%100) == 0) {
    +           reporter.setStatus(mapTaskId + " processed " + noRecords + 
    +                              " from input-file: " + inputFile); 
    +         }
    +         
    +         // Output the result
    +         output.collect(key, val);
    +       }
    +     }
    + 

    + +

    Applications may write a custom {@link MapRunnable} to exert greater + control on map processing e.g. multi-threaded Mappers etc.

    + + @see JobConf + @see InputFormat + @see Partitioner + @see Reducer + @see MapReduceBase + @see MapRunnable + @see SequenceFile + @deprecated Use {@link org.apache.hadoop.mapreduce.Mapper} instead.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + Provides default no-op implementations for a few methods, most non-trivial + applications need to override some of them.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + <key, value> pairs. + +

    Mapping of input records to output records is complete when this method + returns.

    + + @param input the {@link RecordReader} to read the input records. + @param output the {@link OutputCollector} to collect the outputrecords. + @param reporter {@link Reporter} to report progress, status-updates etc. + @throws IOException]]> +
    +
    + + Custom implementations of MapRunnable can exert greater + control on map processing e.g. multi-threaded, asynchronous mappers etc.

    + + @see Mapper + @deprecated Use {@link org.apache.hadoop.mapreduce.Mapper} instead.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + nearly + equal content length.
    + Subclasses implement {@link #getRecordReader(InputSplit, JobConf, Reporter)} + to construct RecordReader's for MultiFileSplit's. + @see MultiFileSplit + @deprecated Use {@link org.apache.hadoop.mapred.lib.CombineFileInputFormat} instead]]> +
    +
    + + + + + + + + + + + + + MultiFileSplit can be used to implement {@link RecordReader}'s, with + reading one record per file. + @see FileSplit + @see MultiFileInputFormat + @deprecated Use {@link org.apache.hadoop.mapred.lib.CombineFileSplit} instead]]> + + + + + + + + + + + + + + + <key, value> pairs output by {@link Mapper}s + and {@link Reducer}s. + +

    OutputCollector is the generalization of the facility + provided by the Map-Reduce framework to collect data output by either the + Mapper or the Reducer i.e. intermediate outputs + or the output of the job.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + OutputCommitter describes the commit of task output for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the OutputCommitter of + the job to:

    +

      +
    1. + Setup the job during initialization. For example, create the temporary + output directory for the job during the initialization of the job. +
    2. +
    3. + Cleanup the job after the job completion. For example, remove the + temporary output directory after the job completion. +
    4. +
    5. + Setup the task temporary output. +
    6. +
    7. + Check whether a task needs a commit. This is to avoid the commit + procedure if a task does not need commit. +
    8. +
    9. + Commit of the task output. +
    10. +
    11. + Discard the task commit. +
    12. +
    + + @see FileOutputCommitter + @see JobContext + @see TaskAttemptContext + @deprecated Use {@link org.apache.hadoop.mapreduce.OutputCommitter} instead.]]> +
    +
    + + + + + + + + + + + + + + + + + + + This is to validate the output specification for the job when it is + a job is submitted. Typically checks that it does not already exist, + throwing an exception when it already exists, so that output is not + overwritten.

    + + @param ignored + @param job job configuration. + @throws IOException when output should not be attempted]]> +
    +
    + + OutputFormat describes the output-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the OutputFormat of the + job to:

    +

      +
    1. + Validate the output-specification of the job. For e.g. check that the + output directory doesn't already exist. +
    2. + Provide the {@link RecordWriter} implementation to be used to write out + the output files of the job. Output files are stored in a + {@link FileSystem}. +
    3. +
    + + @see RecordWriter + @see JobConf + @deprecated Use {@link org.apache.hadoop.mapreduce.OutputFormat} instead.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + Typically a hash function on a all or a subset of the key.

    + + @param key the key to be paritioned. + @param value the entry value. + @param numPartitions the total number of partitions. + @return the partition number for the key.]]> +
    +
    + + Partitioner controls the partitioning of the keys of the + intermediate map-outputs. The key (or a subset of the key) is used to derive + the partition, typically by a hash function. The total number of partitions + is the same as the number of reduce tasks for the job. Hence this controls + which of the m reduce tasks the intermediate key (and hence the + record) is sent for reduction.

    + + @see Reducer + @deprecated Use {@link org.apache.hadoop.mapreduce.Partitioner} instead.]]> +
    +
    + + + + + + + + + + + + + + + + + + + true if there exists a key/value, + false otherwise. + @throws IOException]]> + + + + + + + + + + + + + + + RawKeyValueIterator is an iterator used to iterate over + the raw keys and values during sort/merge of intermediate data.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.0 to 1.0. + @throws IOException]]> + + + + RecordReader reads <key, value> pairs from an + {@link InputSplit}. + +

    RecordReader, typically, converts the byte-oriented view of + the input, provided by the InputSplit, and presents a + record-oriented view for the {@link Mapper} & {@link Reducer} tasks for + processing. It thus assumes the responsibility of processing record + boundaries and presenting the tasks with keys and values.

    + + @see InputSplit + @see InputFormat]]> +
    +
    + + + + + + + + + + + + + + + + RecordWriter to future operations. + + @param reporter facility to report progress. + @throws IOException]]> + + + + RecordWriter writes the output <key, value> pairs + to an output file. + +

    RecordWriter implementations write the job outputs to the + {@link FileSystem}. + + @see OutputFormat]]> + + + + + + + + + + + + + + + Reduces values for a given key. + +

    The framework calls this method for each + <key, (list of values)> pair in the grouped inputs. + Output values must be of the same type as input values. Input keys must + not be altered. The framework will reuse the key and value objects + that are passed into the reduce, therefore the application should clone + the objects they want to keep a copy of. In many cases, all values are + combined into zero or one value. +

    + +

    Output pairs are collected with calls to + {@link OutputCollector#collect(Object,Object)}.

    + +

    Applications can use the {@link Reporter} provided to report progress + or just indicate that they are alive. In scenarios where the application + takes an insignificant amount of time to process individual key/value + pairs, this is crucial since the framework might assume that the task has + timed-out and kill that task. The other way of avoiding this is to set + + mapred.task.timeout to a high-enough value (or even zero for no + time-outs).

    + + @param key the key. + @param values the list of values to reduce. + @param output to collect keys and combined values. + @param reporter facility to report progress.]]> +
    + + + The number of Reducers for the job is set by the user via + {@link JobConf#setNumReduceTasks(int)}. Reducer implementations + can access the {@link JobConf} for the job via the + {@link JobConfigurable#configure(JobConf)} method and initialize themselves. + Similarly they can use the {@link Closeable#close()} method for + de-initialization.

    + +

    Reducer has 3 primary phases:

    +
      +
    1. + +

      Shuffle

      + +

      Reducer is input the grouped output of a {@link Mapper}. + In the phase the framework, for each Reducer, fetches the + relevant partition of the output of all the Mappers, via HTTP. +

      +
    2. + +
    3. +

      Sort

      + +

      The framework groups Reducer inputs by keys + (since different Mappers may have output the same key) in this + stage.

      + +

      The shuffle and sort phases occur simultaneously i.e. while outputs are + being fetched they are merged.

      + +
      SecondarySort
      + +

      If equivalence rules for keys while grouping the intermediates are + different from those for grouping keys before reduction, then one may + specify a Comparator via + {@link JobConf#setOutputValueGroupingComparator(Class)}.Since + {@link JobConf#setOutputKeyComparatorClass(Class)} can be used to + control how intermediate keys are grouped, these can be used in conjunction + to simulate secondary sort on values.

      + + + For example, say that you want to find duplicate web pages and tag them + all with the url of the "best" known example. You would set up the job + like: +
        +
      • Map Input Key: url
      • +
      • Map Input Value: document
      • +
      • Map Output Key: document checksum, url pagerank
      • +
      • Map Output Value: url
      • +
      • Partitioner: by checksum
      • +
      • OutputKeyComparator: by checksum and then decreasing pagerank
      • +
      • OutputValueGroupingComparator: by checksum
      • +
      +
    4. + +
    5. +

      Reduce

      + +

      In this phase the + {@link #reduce(Object, Iterator, OutputCollector, Reporter)} + method is called for each <key, (list of values)> pair in + the grouped inputs.

      +

      The output of the reduce task is typically written to the + {@link FileSystem} via + {@link OutputCollector#collect(Object, Object)}.

      +
    6. +
    + +

    The output of the Reducer is not re-sorted.

    + +

    Example:

    +

    +     public class MyReducer<K extends WritableComparable, V extends Writable> 
    +     extends MapReduceBase implements Reducer<K, V, K, V> {
    +     
    +       static enum MyCounters { NUM_RECORDS }
    +        
    +       private String reduceTaskId;
    +       private int noKeys = 0;
    +       
    +       public void configure(JobConf job) {
    +         reduceTaskId = job.get("mapred.task.id");
    +       }
    +       
    +       public void reduce(K key, Iterator<V> values,
    +                          OutputCollector<K, V> output, 
    +                          Reporter reporter)
    +       throws IOException {
    +       
    +         // Process
    +         int noValues = 0;
    +         while (values.hasNext()) {
    +           V value = values.next();
    +           
    +           // Increment the no. of values for this key
    +           ++noValues;
    +           
    +           // Process the <key, value> pair (assume this takes a while)
    +           // ...
    +           // ...
    +           
    +           // Let the framework know that we are alive, and kicking!
    +           if ((noValues%10) == 0) {
    +             reporter.progress();
    +           }
    +         
    +           // Process some more
    +           // ...
    +           // ...
    +           
    +           // Output the <key, value> 
    +           output.collect(key, value);
    +         }
    +         
    +         // Increment the no. of <key, list of values> pairs processed
    +         ++noKeys;
    +         
    +         // Increment counters
    +         reporter.incrCounter(NUM_RECORDS, 1);
    +         
    +         // Every 100 keys update application-level status
    +         if ((noKeys%100) == 0) {
    +           reporter.setStatus(reduceTaskId + " processed " + noKeys);
    +         }
    +       }
    +     }
    + 

    + + @see Mapper + @see Partitioner + @see Reporter + @see MapReduceBase + @deprecated Use {@link org.apache.hadoop.mapreduce.Reducer} instead.]]> +
    +
    + + + + + + + + + + + + + + Counter of the given group/name.]]> + + + + + + + Counter of the given group/name.]]> + + + + + + + Enum. + @param amount A non-negative amount by which the counter is to + be incremented.]]> + + + + + + + + + + + + + + InputSplit that the map is reading from. + @throws UnsupportedOperationException if called outside a mapper]]> + + + + + + + + + {@link Mapper} and {@link Reducer} can use the Reporter + provided to report progress or just indicate that they are alive. In + scenarios where the application takes an insignificant amount of time to + process individual key/value pairs, this is crucial since the framework + might assume that the task has timed-out and kill that task. + +

    Applications can also update {@link Counters} via the provided + Reporter .

    + + @see Progressable + @see Counters]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + progress of the job's map-tasks, as a float between 0.0 + and 1.0. When all map tasks have completed, the function returns 1.0. + + @return the progress of the job's map-tasks. + @throws IOException]]> + + + + + + progress of the job's reduce-tasks, as a float between 0.0 + and 1.0. When all reduce tasks have completed, the function returns 1.0. + + @return the progress of the job's reduce-tasks. + @throws IOException]]> + + + + + + progress of the job's cleanup-tasks, as a float between 0.0 + and 1.0. When all cleanup tasks have completed, the function returns 1.0. + + @return the progress of the job's cleanup-tasks. + @throws IOException]]> + + + + + + progress of the job's setup-tasks, as a float between 0.0 + and 1.0. When all setup tasks have completed, the function returns 1.0. + + @return the progress of the job's setup-tasks. + @throws IOException]]> + + + + + + true if the job is complete, else false. + @throws IOException]]> + + + + + + true if the job succeeded, else false. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RunningJob is the user-interface to query for details on a + running Map-Reduce job. + +

    Clients can get hold of RunningJob via the {@link JobClient} + and then query the running-job for details such as name, configuration, + progress etc.

    + + @see JobClient]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This allows the user to specify the key class to be different + from the actual class ({@link BytesWritable}) used for writing

    + + @param conf the {@link JobConf} to modify + @param theClass the SequenceFile output key class.]]> +
    +
    + + + + + This allows the user to specify the value class to be different + from the actual class ({@link BytesWritable}) used for writing

    + + @param conf the {@link JobConf} to modify + @param theClass the SequenceFile output key class.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + f. The filtering criteria is + MD5(key) % f == 0.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + f using + the criteria record# % f == 0. + For example, if the frequency is 10, one out of 10 records is returned.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if auto increment + {@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS}. + false otherwise.]]> + + + + + + + + + + + + + true if auto increment + {@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS}. + false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Hadoop provides an optional mode of execution in which the bad records + are detected and skipped in further attempts. + +

    This feature can be used when map/reduce tasks crashes deterministically on + certain input. This happens due to bugs in the map/reduce function. The usual + course would be to fix these bugs. But sometimes this is not possible; + perhaps the bug is in third party libraries for which the source code is + not available. Due to this, the task never reaches to completion even with + multiple attempts and complete data for that task is lost.

    + +

    With this feature, only a small portion of data is lost surrounding + the bad record, which may be acceptable for some user applications. + see {@link SkipBadRecords#setMapperMaxSkipRecords(Configuration, long)}

    + +

    The skipping mode gets kicked off after certain no of failures + see {@link SkipBadRecords#setAttemptsToStartSkipping(Configuration, int)}

    + +

    In the skipping mode, the map/reduce task maintains the record range which + is getting processed at all times. Before giving the input to the + map/reduce function, it sends this record range to the Task tracker. + If task crashes, the Task tracker knows which one was the last reported + range. On further attempts that range get skipped.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + all task attempt IDs + of any jobtracker, in any job, of the first + map task, we would use : +
     
    + TaskAttemptID.getTaskAttemptIDsPattern(null, null, true, 1, null);
    + 
    + which will return : +
     "attempt_[^_]*_[0-9]*_m_000001_[0-9]*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @param isMap whether the tip is a map, or null + @param taskId taskId number, or null + @param attemptId the task attempt number, or null + @return a regex pattern matching TaskAttemptIDs]]> +
    +
    + + + An example TaskAttemptID is : + attempt_200707121733_0003_m_000005_0 , which represents the + zeroth task attempt for the fifth map task in the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse TaskAttemptID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the first map task + of any jobtracker, of any job, we would use : +

     
    + TaskID.getTaskIDsPattern(null, null, true, 1);
    + 
    + which will return : +
     "task_[^_]*_[0-9]*_m_000001*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @param isMap whether the tip is a map, or null + @param taskId taskId number, or null + @return a regex pattern matching TaskIDs]]> +
    + + + + + + + + An example TaskID is : + task_200707121733_0003_m_000005 , which represents the + fifth map task in the third job running at the jobtracker + started at 200707121733. +

    + Applications should never construct or parse TaskID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskAttemptID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hadoop.log.dir.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the Job was added.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ([,]*) + func ::= tbl(,"") + class ::= @see java.lang.Class#forName(java.lang.String) + path ::= @see org.apache.hadoop.fs.Path#Path(java.lang.String) + } + Reads expression from the mapred.join.expr property and + user-supplied join types from mapred.join.define.<ident> + types. Paths supplied to tbl are given as input paths to the + InputFormat class listed. + @see #compose(java.lang.String, java.lang.Class, java.lang.String...)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ,

    ) }]]> + + + + + + + + (tbl(,),tbl(,),...,tbl(,)) }]]> + + + + + + + + (tbl(,),tbl(,),...,tbl(,)) }]]> + + + + mapred.join.define.<ident> to a classname. In the expression + mapred.join.expr, the identifier will be assumed to be a + ComposableRecordReader. + mapred.join.keycomparator can be a classname used to compare keys + in the join. + @see JoinRecordReader + @see MultiFilterRecordReader]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ...... + }]]> + + + + + + + + + + + + + + + + + + + + + capacity children to position + id in the parent reader. + The id of a root CompositeRecordReader is -1 by convention, but relying + on this is not recommended.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + override(S1,S2,S3) will prefer values + from S3 over S2, and values from S2 over S1 for all keys + emitted from all sources.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [,,...,]]]> + + + + + + + out. + TupleWritable format: + {@code + ...... + }]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It has to be specified how key and values are passed from one element of + the chain to the next, by value or by reference. If a Mapper leverages the + assumed semantics that the key and values are not modified by the collector + 'by value' must be used. If the Mapper does not expect this semantics, as + an optimization to avoid serialization and deserialization 'by reference' + can be used. +

    + For the added Mapper the configuration given for it, + mapperConf, have precedence over the job's JobConf. This + precedence is in effect when the task is running. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainMapper, this is done by the addMapper for the last mapper in the chain +

    + + @param job job's JobConf to add the Mapper class. + @param klass the Mapper class to add. + @param inputKeyClass mapper input key class. + @param inputValueClass mapper input value class. + @param outputKeyClass mapper output key class. + @param outputValueClass mapper output value class. + @param byValue indicates if key/values should be passed by value + to the next Mapper in the chain, if any. + @param mapperConf a JobConf with the configuration for the Mapper + class. It is recommended to use a JobConf without default values using the + JobConf(boolean loadDefaults) constructor with FALSE.]]> + + + + + + + If this method is overriden super.configure(...) should be + invoked at the beginning of the overwriter method.]]> + + + + + + + + + + map(...) methods of the Mappers in the chain.]]> + + + + + + + If this method is overriden super.close() should be + invoked at the end of the overwriter method.]]> + + + + + The Mapper classes are invoked in a chained (or piped) fashion, the output of + the first becomes the input of the second, and so on until the last Mapper, + the output of the last Mapper will be written to the task's output. +

    + The key functionality of this feature is that the Mappers in the chain do not + need to be aware that they are executed in a chain. This enables having + reusable specialized Mappers that can be combined to perform composite + operations within a single task. +

    + Special care has to be taken when creating chains that the key/values output + by a Mapper are valid for the following Mapper in the chain. It is assumed + all Mappers and the Reduce in the chain use maching output and input key and + value classes as no conversion is done by the chaining code. +

    + Using the ChainMapper and the ChainReducer classes is possible to compose + Map/Reduce jobs that look like [MAP+ / REDUCE MAP*]. And + immediate benefit of this pattern is a dramatic reduction in disk IO. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainMapper, this is done by the addMapper for the last mapper in the chain. +

    + ChainMapper usage pattern: +

    +

    + ...
    + conf.setJobName("chain");
    + conf.setInputFormat(TextInputFormat.class);
    + conf.setOutputFormat(TextOutputFormat.class);
    + 

    + JobConf mapAConf = new JobConf(false); + ... + ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class, + Text.class, Text.class, true, mapAConf); +

    + JobConf mapBConf = new JobConf(false); + ... + ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class, + LongWritable.class, Text.class, false, mapBConf); +

    + JobConf reduceConf = new JobConf(false); + ... + ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class, + Text.class, Text.class, true, reduceConf); +

    + ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class, + LongWritable.class, Text.class, false, null); +

    + ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class, + LongWritable.class, LongWritable.class, true, null); +

    + FileInputFormat.setInputPaths(conf, inDir); + FileOutputFormat.setOutputPath(conf, outDir); + ... +

    + JobClient jc = new JobClient(conf); + RunningJob job = jc.submitJob(conf); + ... +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + It has to be specified how key and values are passed from one element of + the chain to the next, by value or by reference. If a Reducer leverages the + assumed semantics that the key and values are not modified by the collector + 'by value' must be used. If the Reducer does not expect this semantics, as + an optimization to avoid serialization and deserialization 'by reference' + can be used. +

    + For the added Reducer the configuration given for it, + reducerConf, have precedence over the job's JobConf. This + precedence is in effect when the task is running. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainReducer, this is done by the setReducer or the addMapper for the last + element in the chain. + + @param job job's JobConf to add the Reducer class. + @param klass the Reducer class to add. + @param inputKeyClass reducer input key class. + @param inputValueClass reducer input value class. + @param outputKeyClass reducer output key class. + @param outputValueClass reducer output value class. + @param byValue indicates if key/values should be passed by value + to the next Mapper in the chain, if any. + @param reducerConf a JobConf with the configuration for the Reducer + class. It is recommended to use a JobConf without default values using the + JobConf(boolean loadDefaults) constructor with FALSE.]]> + + + + + + + + + + + + + + It has to be specified how key and values are passed from one element of + the chain to the next, by value or by reference. If a Mapper leverages the + assumed semantics that the key and values are not modified by the collector + 'by value' must be used. If the Mapper does not expect this semantics, as + an optimization to avoid serialization and deserialization 'by reference' + can be used. +

    + For the added Mapper the configuration given for it, + mapperConf, have precedence over the job's JobConf. This + precedence is in effect when the task is running. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainMapper, this is done by the addMapper for the last mapper in the chain + . + + @param job chain job's JobConf to add the Mapper class. + @param klass the Mapper class to add. + @param inputKeyClass mapper input key class. + @param inputValueClass mapper input value class. + @param outputKeyClass mapper output key class. + @param outputValueClass mapper output value class. + @param byValue indicates if key/values should be passed by value + to the next Mapper in the chain, if any. + @param mapperConf a JobConf with the configuration for the Mapper + class. It is recommended to use a JobConf without default values using the + JobConf(boolean loadDefaults) constructor with FALSE.]]> + + + + + + + If this method is overriden super.configure(...) should be + invoked at the beginning of the overwriter method.]]> + + + + + + + + + + reduce(...) method of the Reducer with the + map(...) methods of the Mappers in the chain.]]> + + + + + + + If this method is overriden super.close() should be + invoked at the end of the overwriter method.]]> + + + + + For each record output by the Reducer, the Mapper classes are invoked in a + chained (or piped) fashion, the output of the first becomes the input of the + second, and so on until the last Mapper, the output of the last Mapper will + be written to the task's output. +

    + The key functionality of this feature is that the Mappers in the chain do not + need to be aware that they are executed after the Reducer or in a chain. + This enables having reusable specialized Mappers that can be combined to + perform composite operations within a single task. +

    + Special care has to be taken when creating chains that the key/values output + by a Mapper are valid for the following Mapper in the chain. It is assumed + all Mappers and the Reduce in the chain use maching output and input key and + value classes as no conversion is done by the chaining code. +

    + Using the ChainMapper and the ChainReducer classes is possible to compose + Map/Reduce jobs that look like [MAP+ / REDUCE MAP*]. And + immediate benefit of this pattern is a dramatic reduction in disk IO. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainReducer, this is done by the setReducer or the addMapper for the last + element in the chain. +

    + ChainReducer usage pattern: +

    +

    + ...
    + conf.setJobName("chain");
    + conf.setInputFormat(TextInputFormat.class);
    + conf.setOutputFormat(TextOutputFormat.class);
    + 

    + JobConf mapAConf = new JobConf(false); + ... + ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class, + Text.class, Text.class, true, mapAConf); +

    + JobConf mapBConf = new JobConf(false); + ... + ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class, + LongWritable.class, Text.class, false, mapBConf); +

    + JobConf reduceConf = new JobConf(false); + ... + ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class, + Text.class, Text.class, true, reduceConf); +

    + ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class, + LongWritable.class, Text.class, false, null); +

    + ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class, + LongWritable.class, LongWritable.class, true, null); +

    + FileInputFormat.setInputPaths(conf, inDir); + FileOutputFormat.setOutputPath(conf, outDir); + ... +

    + JobClient jc = new JobClient(conf); + RunningJob job = jc.submitJob(conf); + ... +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RecordReader's for CombineFileSplit's. + @see CombineFileSplit]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + th Path]]> + + + + + + th Path]]> + + + + + + + + + + + th Path]]> + + + + + + + + + + + + + + + + + + + + + + + + + + CombineFileSplit can be used to implement {@link org.apache.hadoop.mapred.RecordReader}'s, + with reading one record per file. + @see org.apache.hadoop.mapred.FileSplit + @see CombineFileInputFormat]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + all splits. + @param freq The frequency with which records will be emitted.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + all splits. + This will read every split at the client, which is very expensive. + @param freq Probability with which a key will be chosen. + @param numSamples Total number of samples to obtain from all selected + splits.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + all splits. + Takes the first numSamples / numSplits records from each split. + @param numSamples Total number of samples to obtain from all selected + splits.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the name output is multi, false + if it is single. If the name output is not defined it returns + false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @param conf job conf to add the named output + @param namedOutput named output name, it has to be a word, letters + and numbers only, cannot be the word 'part' as + that is reserved for the + default output. + @param outputFormatClass OutputFormat class. + @param keyClass key class + @param valueClass value class]]> + + + + + + + + + + + + @param conf job conf to add the named output + @param namedOutput named output name, it has to be a word, letters + and numbers only, cannot be the word 'part' as + that is reserved for the + default output. + @param outputFormatClass OutputFormat class. + @param keyClass key class + @param valueClass value class]]> + + + + + + + + By default these counters are disabled. +

    + MultipleOutputs supports counters, by default the are disabled. + The counters group is the {@link MultipleOutputs} class name. +

    + The names of the counters are the same as the named outputs. For multi + named outputs the name of the counter is the concatenation of the named + output, and underscore '_' and the multiname. + + @param conf job conf to enableadd the named output. + @param enabled indicates if the counters will be enabled or not.]]> +
    +
    + + + + + By default these counters are disabled. +

    + MultipleOutputs supports counters, by default the are disabled. + The counters group is the {@link MultipleOutputs} class name. +

    + The names of the counters are the same as the named outputs. For multi + named outputs the name of the counter is the concatenation of the named + output, and underscore '_' and the multiname. + + + @param conf job conf to enableadd the named output. + @return TRUE if the counters are enabled, FALSE if they are disabled.]]> +
    +
    + + + + + + + + + + + + + @param namedOutput the named output name + @param reporter the reporter + @return the output collector for the given named output + @throws IOException thrown if output collector could not be created]]> + + + + + + + + + + + @param namedOutput the named output name + @param multiName the multi name part + @param reporter the reporter + @return the output collector for the given named output + @throws IOException thrown if output collector could not be created]]> + + + + + + + If overriden subclasses must invoke super.close() at the + end of their close() + + @throws java.io.IOException thrown if any of the MultipleOutput files + could not be closed properly.]]> + + + + OutputCollector passed to + the map() and reduce() methods of the + Mapper and Reducer implementations. +

    + Each additional output, or named output, may be configured with its own + OutputFormat, with its own key class and with its own value + class. +

    + A named output can be a single file or a multi file. The later is refered as + a multi named output. +

    + A multi named output is an unbound set of files all sharing the same + OutputFormat, key class and value class configuration. +

    + When named outputs are used within a Mapper implementation, + key/values written to a name output are not part of the reduce phase, only + key/values written to the job OutputCollector are part of the + reduce phase. +

    + MultipleOutputs supports counters, by default the are disabled. The counters + group is the {@link MultipleOutputs} class name. +

    + The names of the counters are the same as the named outputs. For multi + named outputs the name of the counter is the concatenation of the named + output, and underscore '_' and the multiname. +

    + Job configuration usage pattern is: +

    +
    + JobConf conf = new JobConf();
    +
    + conf.setInputPath(inDir);
    + FileOutputFormat.setOutputPath(conf, outDir);
    +
    + conf.setMapperClass(MOMap.class);
    + conf.setReducerClass(MOReduce.class);
    + ...
    +
    + // Defines additional single text based output 'text' for the job
    + MultipleOutputs.addNamedOutput(conf, "text", TextOutputFormat.class,
    + LongWritable.class, Text.class);
    +
    + // Defines additional multi sequencefile based output 'sequence' for the
    + // job
    + MultipleOutputs.addMultiNamedOutput(conf, "seq",
    +   SequenceFileOutputFormat.class,
    +   LongWritable.class, Text.class);
    + ...
    +
    + JobClient jc = new JobClient();
    + RunningJob job = jc.submitJob(conf);
    +
    + ...
    + 
    +

    + Job configuration usage pattern is: +

    +
    + public class MOReduce implements
    +   Reducer<WritableComparable, Writable> {
    + private MultipleOutputs mos;
    +
    + public void configure(JobConf conf) {
    + ...
    + mos = new MultipleOutputs(conf);
    + }
    +
    + public void reduce(WritableComparable key, Iterator<Writable> values,
    + OutputCollector output, Reporter reporter)
    + throws IOException {
    + ...
    + mos.getCollector("text", reporter).collect(key, new Text("Hello"));
    + mos.getCollector("seq", "A", reporter).collect(key, new Text("Bye"));
    + mos.getCollector("seq", "B", reporter).collect(key, new Text("Chau"));
    + ...
    + }
    +
    + public void close() throws IOException {
    + mos.close();
    + ...
    + }
    +
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It can be used instead of the default implementation, + @link org.apache.hadoop.mapred.MapRunner, when the Map operation is not CPU + bound in order to improve throughput. +

    + Map implementations using this MapRunnable must be thread-safe. +

    + The Map-Reduce job has to be configured to use this MapRunnable class (using + the JobConf.setMapRunnerClass method) and + the number of thread the thread-pool can use with the + mapred.map.multithreadedrunner.threads property, its default + value is 10 threads. +

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + pairs. Uses + {@link StringTokenizer} to break text into tokens. + @deprecated Use + {@link org.apache.hadoop.mapreduce.lib.map.TokenCounterMapper} instead.]]> + + + + + + + + + + + + total.order.partitioner.natural.order is not false, a trie + of the first total.order.partitioner.max.trie.depth(2) + 1 bytes + will be built. Otherwise, keys will be located using a binary search of + the partition keyset using the {@link org.apache.hadoop.io.RawComparator} + defined for this job. The input file must be sorted with the same + comparator and contain {@link + org.apache.hadoop.mapred.JobConf#getNumReduceTasks} - 1 keys.]]> + + + + + + + + + + + + R reduces, there are R-1 + keys in the SequenceFile.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + generateKeyValPairs(Object key, Object value); public void + configure(JobConfjob); } + + The package also provides a base class, ValueAggregatorBaseDescriptor, + implementing the above interface. The user can extend the base class and + implement generateKeyValPairs accordingly. + + The primary work of generateKeyValPairs is to emit one or more key/value + pairs based on the input key/value pair. The key in an output key/value pair + encode two pieces of information: aggregation type and aggregation id. The + value will be aggregated onto the aggregation id according the aggregation + type. + + This class offers a function to generate a map/reduce job using Aggregate + framework. The function takes the following parameters: input directory spec + input format (text or sequence file) output directory a file specifying the + user plugin class]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The job can be configured using the static methods in this class, + {@link DBInputFormat}, and {@link DBOutputFormat}. +

    + Alternatively, the properties can be set in the configuration with proper + values. + + @see DBConfiguration#configureDB(JobConf, String, String, String, String) + @see DBInputFormat#setInput(JobConf, Class, String, String) + @see DBInputFormat#setInput(JobConf, Class, String, String, String, String...) + @see DBOutputFormat#setOutput(JobConf, String, String...)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 20070101 AND length > 0)' + @param orderBy the fieldNames in the orderBy clause. + @param fieldNames The field names in the table + @see #setInput(JobConf, Class, String, String)]]> + + + + + + + + + + + + + + DBInputFormat emits LongWritables containing the record number as + key and DBWritables as value. + + The SQL query, and input class can be using one of the two + setInput methods.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {@link DBOutputFormat} accepts <key,value> pairs, where + key has a type extending DBWritable. Returned {@link RecordWriter} + writes only the key to the database with a batch SQL query.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DBWritable. DBWritable, is similar to {@link Writable} + except that the {@link #write(PreparedStatement)} method takes a + {@link PreparedStatement}, and {@link #readFields(ResultSet)} + takes a {@link ResultSet}. +

    + Implementations are responsible for writing the fields of the object + to PreparedStatement, and reading the fields of the object from the + ResultSet. + +

    Example:

    + If we have the following table in the database : +
    + CREATE TABLE MyTable (
    +   counter        INTEGER NOT NULL,
    +   timestamp      BIGINT  NOT NULL,
    + );
    + 
    + then we can read/write the tuples from/to the table with : +

    + public class MyWritable implements Writable, DBWritable {
    +   // Some data     
    +   private int counter;
    +   private long timestamp;
    +       
    +   //Writable#write() implementation
    +   public void write(DataOutput out) throws IOException {
    +     out.writeInt(counter);
    +     out.writeLong(timestamp);
    +   }
    +       
    +   //Writable#readFields() implementation
    +   public void readFields(DataInput in) throws IOException {
    +     counter = in.readInt();
    +     timestamp = in.readLong();
    +   }
    +       
    +   public void write(PreparedStatement statement) throws SQLException {
    +     statement.setInt(1, counter);
    +     statement.setLong(2, timestamp);
    +   }
    +       
    +   public void readFields(ResultSet resultSet) throws SQLException {
    +     counter = resultSet.getInt(1);
    +     timestamp = resultSet.getLong(2);
    +   } 
    + }
    + 

    ]]> +
    + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Counters represent global counters, defined either by the + Map-Reduce framework or applications. Each Counter is named by + an {@link Enum} and has a long for the value.

    + +

    Counters are bunched into Groups, each comprising of + counters from a particular Enum class.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Each {@link InputSplit} is then assigned to an individual {@link Mapper} + for processing.

    + +

    Note: The split is a logical split of the inputs and the + input files are not physically split into chunks. For e.g. a split could + be <input-file-path, start, offset> tuple. The InputFormat + also creates the {@link RecordReader} to read the {@link InputSplit}. + + @param context job configuration. + @return an array of {@link InputSplit}s for the job.]]> + + + + + + + + + + + + + InputFormat describes the input-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the InputFormat of the + job to:

    +

      +
    1. + Validate the input-specification of the job. +
    2. + Split-up the input file(s) into logical {@link InputSplit}s, each of + which is then assigned to an individual {@link Mapper}. +
    3. +
    4. + Provide the {@link RecordReader} implementation to be used to glean + input records from the logical InputSplit for processing by + the {@link Mapper}. +
    5. +
    + +

    The default behavior of file-based {@link InputFormat}s, typically + sub-classes of {@link FileInputFormat}, is to split the + input into logical {@link InputSplit}s based on the total size, in + bytes, of the input files. However, the {@link FileSystem} blocksize of + the input files is treated as an upper bound for input splits. A lower bound + on the split size can be set via + + mapred.min.split.size.

    + +

    Clearly, logical splits based on input-size is insufficient for many + applications since record boundaries are to respected. In such cases, the + application has to also implement a {@link RecordReader} on whom lies the + responsibility to respect record-boundaries and present a record-oriented + view of the logical InputSplit to the individual task. + + @see InputSplit + @see RecordReader + @see FileInputFormat]]> + + + + + + + + + + + + + + + + + + + + + + + InputSplit represents the data to be processed by an + individual {@link Mapper}. + +

    Typically, it presents a byte-oriented view on the input and is the + responsibility of {@link RecordReader} of the job to process this and present + a record-oriented view. + + @see InputFormat + @see RecordReader]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + InputFormat to use + @throws IllegalStateException if the job is submitted]]> + + + + + + + OutputFormat to use + @throws IllegalStateException if the job is submitted]]> + + + + + + + Mapper to use + @throws IllegalStateException if the job is submitted]]> + + + + + + + + + + + + + + + + + + + + + + + + + Reducer to use + @throws IllegalStateException if the job is submitted]]> + + + + + + + Partitioner to use + @throws IllegalStateException if the job is submitted]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + progress of the job's map-tasks, as a float between 0.0 + and 1.0. When all map tasks have completed, the function returns 1.0. + + @return the progress of the job's map-tasks. + @throws IOException]]> + + + + + + progress of the job's reduce-tasks, as a float between 0.0 + and 1.0. When all reduce tasks have completed, the function returns 1.0. + + @return the progress of the job's reduce-tasks. + @throws IOException]]> + + + + + + true if the job is complete, else false. + @throws IOException]]> + + + + + + true if the job succeeded, else false. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobTracker is lost]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1. + @return the number of reduce tasks for this job.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + An example JobID is : + job_200707121733_0003 , which represents the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse JobID strings, but rather + use appropriate constructors or {@link #forName(String)} method. + + @see TaskID + @see TaskAttemptID + @see org.apache.hadoop.mapred.JobTracker#getNewJobId() + @see org.apache.hadoop.mapred.JobTracker#getStartTime()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + the key input type to the Mapper + @param the value input type to the Mapper + @param the key output type from the Mapper + @param the value output type from the Mapper]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Maps are the individual tasks which transform input records into a + intermediate records. The transformed intermediate records need not be of + the same type as the input records. A given input pair may map to zero or + many output pairs.

    + +

    The Hadoop Map-Reduce framework spawns one map task for each + {@link InputSplit} generated by the {@link InputFormat} for the job. + Mapper implementations can access the {@link Configuration} for + the job via the {@link JobContext#getConfiguration()}. + +

    The framework first calls + {@link #setup(org.apache.hadoop.mapreduce.Mapper.Context)}, followed by + {@link #map(Object, Object, Context)} + for each key/value pair in the InputSplit. Finally + {@link #cleanup(Context)} is called.

    + +

    All intermediate values associated with a given output key are + subsequently grouped by the framework, and passed to a {@link Reducer} to + determine the final output. Users can control the sorting and grouping by + specifying two key {@link RawComparator} classes.

    + +

    The Mapper outputs are partitioned per + Reducer. Users can control which keys (and hence records) go to + which Reducer by implementing a custom {@link Partitioner}. + +

    Users can optionally specify a combiner, via + {@link Job#setCombinerClass(Class)}, to perform local aggregation of the + intermediate outputs, which helps to cut down the amount of data transferred + from the Mapper to the Reducer. + +

    Applications can specify if and how the intermediate + outputs are to be compressed and which {@link CompressionCodec}s are to be + used via the Configuration.

    + +

    If the job has zero + reduces then the output of the Mapper is directly written + to the {@link OutputFormat} without sorting by keys.

    + +

    Example:

    +

    + public class TokenCounterMapper 
    +     extends Mapper{
    +    
    +   private final static IntWritable one = new IntWritable(1);
    +   private Text word = new Text();
    +   
    +   public void map(Object key, Text value, Context context) throws IOException {
    +     StringTokenizer itr = new StringTokenizer(value.toString());
    +     while (itr.hasMoreTokens()) {
    +       word.set(itr.nextToken());
    +       context.collect(word, one);
    +     }
    +   }
    + }
    + 

    + +

    Applications may override the {@link #run(Context)} method to exert + greater control on map processing e.g. multi-threaded Mappers + etc.

    + + @see InputFormat + @see JobContext + @see Partitioner + @see Reducer]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + OutputCommitter describes the commit of task output for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the OutputCommitter of + the job to:

    +

      +
    1. + Setup the job during initialization. For example, create the temporary + output directory for the job during the initialization of the job. +
    2. +
    3. + Cleanup the job after the job completion. For example, remove the + temporary output directory after the job completion. +
    4. +
    5. + Setup the task temporary output. +
    6. +
    7. + Check whether a task needs a commit. This is to avoid the commit + procedure if a task does not need commit. +
    8. +
    9. + Commit of the task output. +
    10. +
    11. + Discard the task commit. +
    12. +
    + + @see org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter + @see JobContext + @see TaskAttemptContext]]> +
    +
    + + + + + + + + + + + + + + + + + + + This is to validate the output specification for the job when it is + a job is submitted. Typically checks that it does not already exist, + throwing an exception when it already exists, so that output is not + overwritten.

    + + @param context information about the job + @throws IOException when output should not be attempted]]> +
    +
    + + + + + + + + + + OutputFormat describes the output-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the OutputFormat of the + job to:

    +

      +
    1. + Validate the output-specification of the job. For e.g. check that the + output directory doesn't already exist. +
    2. + Provide the {@link RecordWriter} implementation to be used to write out + the output files of the job. Output files are stored in a + {@link FileSystem}. +
    3. +
    + + @see RecordWriter]]> +
    +
    + + + + + + + + + + + Typically a hash function on a all or a subset of the key.

    + + @param key the key to be partioned. + @param value the entry value. + @param numPartitions the total number of partitions. + @return the partition number for the key.]]> +
    +
    + + Partitioner controls the partitioning of the keys of the + intermediate map-outputs. The key (or a subset of the key) is used to derive + the partition, typically by a hash function. The total number of partitions + is the same as the number of reduce tasks for the job. Hence this controls + which of the m reduce tasks the intermediate key (and hence the + record) is sent for reduction.

    + + @see Reducer]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @param ]]> + + + + + + + + + + + + + + + + + + + + + + RecordWriter to future operations. + + @param context the context of the task + @throws IOException]]> + + + + RecordWriter writes the output <key, value> pairs + to an output file. + +

    RecordWriter implementations write the job outputs to the + {@link FileSystem}. + + @see OutputFormat]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the input keys + @param the class of the input values + @param the class of the output keys + @param the class of the output values]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Reducer implementations + can access the {@link Configuration} for the job via the + {@link JobContext#getConfiguration()} method.

    + +

    Reducer has 3 primary phases:

    +
      +
    1. + +

      Shuffle

      + +

      The Reducer copies the sorted output from each + {@link Mapper} using HTTP across the network.

      +
    2. + +
    3. +

      Sort

      + +

      The framework merge sorts Reducer inputs by + keys + (since different Mappers may have output the same key).

      + +

      The shuffle and sort phases occur simultaneously i.e. while outputs are + being fetched they are merged.

      + +
      SecondarySort
      + +

      To achieve a secondary sort on the values returned by the value + iterator, the application should extend the key with the secondary + key and define a grouping comparator. The keys will be sorted using the + entire key, but will be grouped using the grouping comparator to decide + which keys and values are sent in the same call to reduce.The grouping + comparator is specified via + {@link Job#setGroupingComparatorClass(Class)}. The sort order is + controlled by + {@link Job#setSortComparatorClass(Class)}.

      + + + For example, say that you want to find duplicate web pages and tag them + all with the url of the "best" known example. You would set up the job + like: +
        +
      • Map Input Key: url
      • +
      • Map Input Value: document
      • +
      • Map Output Key: document checksum, url pagerank
      • +
      • Map Output Value: url
      • +
      • Partitioner: by checksum
      • +
      • OutputKeyComparator: by checksum and then decreasing pagerank
      • +
      • OutputValueGroupingComparator: by checksum
      • +
      +
    4. + +
    5. +

      Reduce

      + +

      In this phase the + {@link #reduce(Object, Iterable, Context)} + method is called for each <key, (collection of values)> in + the sorted inputs.

      +

      The output of the reduce task is typically written to a + {@link RecordWriter} via + {@link Context#write(Object, Object)}.

      +
    6. +
    + +

    The output of the Reducer is not re-sorted.

    + +

    Example:

    +

    + public class IntSumReducer extends Reducer {
    +   private IntWritable result = new IntWritable();
    + 
    +   public void reduce(Key key, Iterable values, 
    +                      Context context) throws IOException {
    +     int sum = 0;
    +     for (IntWritable val : values) {
    +       sum += val.get();
    +     }
    +     result.set(sum);
    +     context.collect(key, result);
    +   }
    + }
    + 

    + + @see Mapper + @see Partitioner]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + An example TaskAttemptID is : + attempt_200707121733_0003_m_000005_0 , which represents the + zeroth task attempt for the fifth map task in the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse TaskAttemptID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + An example TaskID is : + task_200707121733_0003_m_000005 , which represents the + fifth map task in the third job running at the jobtracker + started at 200707121733. +

    + Applications should never construct or parse TaskID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskAttemptID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the input key type for the task + @param the input value type for the task + @param the output key type for the task + @param the output value type for the task]]> + + + + + + + + + + + + + + + + + + + FileInputFormat implementations can override this and return + false to ensure that individual input files are never split-up + so that {@link Mapper}s process entire files. + + @param context the job context + @param filename the file name to check + @return is this file splitable?]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileInputFormat is the base class for all file-based + InputFormats. This provides a generic implementation of + {@link #getSplits(JobContext)}. + Subclasses of FileInputFormat can also override the + {@link #isSplitable(JobContext, Path)} method to ensure input-files are + not split-up and are processed as a whole by {@link Mapper}s.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the map's input key type + @param the map's input value type + @param the map's output key type + @param the map's output value type + @param job the job + @return the mapper class to run]]> + + + + + + + the map input key type + @param the map input value type + @param the map output key type + @param the map output value type + @param job the job to modify + @param cls the class to use as the mapper]]> + + + + + + + + + + + + + It can be used instead of the default implementation, + @link org.apache.hadoop.mapred.MapRunner, when the Map operation is not CPU + bound in order to improve throughput. +

    + Mapper implementations using this MapRunnable must be thread-safe. +

    + The Map-Reduce job has to be configured with the mapper to use via + {@link #setMapperClass(Configuration, Class)} and + the number of thread the thread-pool can use with the + {@link #getNumberOfThreads(Configuration) method. The default + value is 10 threads. +

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the job output should be compressed, + false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tasks' Side-Effect Files + +

    Some applications need to create/write-to side-files, which differ from + the actual job-outputs. + +

    In such cases there could be issues with 2 instances of the same TIP + (running simultaneously e.g. speculative tasks) trying to open/write-to the + same file (path) on HDFS. Hence the application-writer will have to pick + unique names per task-attempt (e.g. using the attemptid, say + attempt_200709221812_0001_m_000000_0), not just per TIP.

    + +

    To get around this the Map-Reduce framework helps the application-writer + out by maintaining a special + ${mapred.output.dir}/_temporary/_${taskid} + sub-directory for each task-attempt on HDFS where the output of the + task-attempt goes. On successful completion of the task-attempt the files + in the ${mapred.output.dir}/_temporary/_${taskid} (only) + are promoted to ${mapred.output.dir}. Of course, the + framework discards the sub-directory of unsuccessful task-attempts. This + is completely transparent to the application.

    + +

    The application-writer can take advantage of this by creating any + side-files required in a work directory during execution + of his task i.e. via + {@link #getWorkOutputPath(TaskInputOutputContext)}, and + the framework will move them out similarly - thus she doesn't have to pick + unique paths per task-attempt.

    + +

    The entire discussion holds true for maps of jobs with + reducer=NONE (i.e. 0 reduces) since output of the map, in that case, + goes directly to HDFS.

    + + @return the {@link Path} to the task's temporary output directory + for the map-reduce job.]]> +
    + + + + + + + + + The path can be used to create custom files from within the map and + reduce tasks. The path name will be unique for each task. The path parent + will be the job output directory.

    ls + +

    This method uses the {@link #getUniqueFile} method to make the file name + unique for the task.

    + + @param context the context for the task. + @param name the name for the file. + @param extension the extension for the file + @return a unique path accross all tasks of the job.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This tool supports archiving and anaylzing (sort/grep) of log-files. + It takes as input + a) Input uri which will serve uris of the logs to be archived. + b) Output directory (not mandatory). + b) Directory on dfs to archive the logs. + c) The sort/grep patterns for analyzing the files and separator for boundaries. + Usage: + Logalyzer -archive -archiveDir -analysis -logs -grep -sort -separator +

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/aarch64/share/hadoop/common/lib/activation-1.1.jar b/aarch64/share/hadoop/common/lib/activation-1.1.jar new file mode 100644 index 0000000..53f82a1 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/activation-1.1.jar differ diff --git a/aarch64/share/hadoop/common/lib/asm-3.2.jar b/aarch64/share/hadoop/common/lib/asm-3.2.jar new file mode 100644 index 0000000..ca9f8d2 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/asm-3.2.jar differ diff --git a/aarch64/share/hadoop/common/lib/avro-1.7.4.jar b/aarch64/share/hadoop/common/lib/avro-1.7.4.jar new file mode 100644 index 0000000..69dd87d Binary files /dev/null and b/aarch64/share/hadoop/common/lib/avro-1.7.4.jar differ diff --git a/aarch64/share/hadoop/common/lib/commons-beanutils-1.7.0.jar b/aarch64/share/hadoop/common/lib/commons-beanutils-1.7.0.jar new file mode 100644 index 0000000..b1b89c9 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/commons-beanutils-1.7.0.jar differ diff --git a/aarch64/share/hadoop/common/lib/commons-beanutils-core-1.8.0.jar b/aarch64/share/hadoop/common/lib/commons-beanutils-core-1.8.0.jar new file mode 100644 index 0000000..87c15f4 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/commons-beanutils-core-1.8.0.jar differ diff --git a/aarch64/share/hadoop/common/lib/commons-cli-1.2.jar b/aarch64/share/hadoop/common/lib/commons-cli-1.2.jar new file mode 100644 index 0000000..ce4b9ff Binary files /dev/null and b/aarch64/share/hadoop/common/lib/commons-cli-1.2.jar differ diff --git a/aarch64/share/hadoop/common/lib/commons-codec-1.4.jar b/aarch64/share/hadoop/common/lib/commons-codec-1.4.jar new file mode 100644 index 0000000..458d432 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/commons-codec-1.4.jar differ diff --git a/aarch64/share/hadoop/common/lib/commons-collections-3.2.1.jar b/aarch64/share/hadoop/common/lib/commons-collections-3.2.1.jar new file mode 100644 index 0000000..c35fa1f Binary files /dev/null and b/aarch64/share/hadoop/common/lib/commons-collections-3.2.1.jar differ diff --git a/aarch64/share/hadoop/common/lib/commons-compress-1.4.1.jar b/aarch64/share/hadoop/common/lib/commons-compress-1.4.1.jar new file mode 100644 index 0000000..b58761e Binary files /dev/null and b/aarch64/share/hadoop/common/lib/commons-compress-1.4.1.jar differ diff --git a/aarch64/share/hadoop/common/lib/commons-configuration-1.6.jar b/aarch64/share/hadoop/common/lib/commons-configuration-1.6.jar new file mode 100644 index 0000000..2d4689a Binary files /dev/null and b/aarch64/share/hadoop/common/lib/commons-configuration-1.6.jar differ diff --git a/aarch64/share/hadoop/common/lib/commons-digester-1.8.jar b/aarch64/share/hadoop/common/lib/commons-digester-1.8.jar new file mode 100644 index 0000000..1110f0a Binary files /dev/null and b/aarch64/share/hadoop/common/lib/commons-digester-1.8.jar differ diff --git a/aarch64/share/hadoop/common/lib/commons-el-1.0.jar b/aarch64/share/hadoop/common/lib/commons-el-1.0.jar new file mode 100644 index 0000000..608ed79 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/commons-el-1.0.jar differ diff --git a/aarch64/share/hadoop/common/lib/commons-httpclient-3.1.jar b/aarch64/share/hadoop/common/lib/commons-httpclient-3.1.jar new file mode 100644 index 0000000..7c59774 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/commons-httpclient-3.1.jar differ diff --git a/aarch64/share/hadoop/common/lib/commons-io-2.1.jar b/aarch64/share/hadoop/common/lib/commons-io-2.1.jar new file mode 100644 index 0000000..b5c7d69 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/commons-io-2.1.jar differ diff --git a/aarch64/share/hadoop/common/lib/commons-lang-2.5.jar b/aarch64/share/hadoop/common/lib/commons-lang-2.5.jar new file mode 100644 index 0000000..ae491da Binary files /dev/null and b/aarch64/share/hadoop/common/lib/commons-lang-2.5.jar differ diff --git a/aarch64/share/hadoop/common/lib/commons-logging-1.1.1.jar b/aarch64/share/hadoop/common/lib/commons-logging-1.1.1.jar new file mode 100644 index 0000000..1deef14 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/commons-logging-1.1.1.jar differ diff --git a/aarch64/share/hadoop/common/lib/commons-math-2.1.jar b/aarch64/share/hadoop/common/lib/commons-math-2.1.jar new file mode 100644 index 0000000..43b4b36 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/commons-math-2.1.jar differ diff --git a/aarch64/share/hadoop/common/lib/commons-net-3.1.jar b/aarch64/share/hadoop/common/lib/commons-net-3.1.jar new file mode 100644 index 0000000..b75f1a5 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/commons-net-3.1.jar differ diff --git a/aarch64/share/hadoop/common/lib/guava-11.0.2.jar b/aarch64/share/hadoop/common/lib/guava-11.0.2.jar new file mode 100644 index 0000000..c8c8d5d Binary files /dev/null and b/aarch64/share/hadoop/common/lib/guava-11.0.2.jar differ diff --git a/aarch64/share/hadoop/common/lib/hadoop-annotations-2.2.0.jar b/aarch64/share/hadoop/common/lib/hadoop-annotations-2.2.0.jar new file mode 100644 index 0000000..c2d4dc1 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/hadoop-annotations-2.2.0.jar differ diff --git a/aarch64/share/hadoop/common/lib/hadoop-auth-2.2.0.jar b/aarch64/share/hadoop/common/lib/hadoop-auth-2.2.0.jar new file mode 100644 index 0000000..0787af1 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/hadoop-auth-2.2.0.jar differ diff --git a/aarch64/share/hadoop/common/lib/jackson-core-asl-1.8.8.jar b/aarch64/share/hadoop/common/lib/jackson-core-asl-1.8.8.jar new file mode 100644 index 0000000..05f3353 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/jackson-core-asl-1.8.8.jar differ diff --git a/aarch64/share/hadoop/common/lib/jackson-jaxrs-1.8.8.jar b/aarch64/share/hadoop/common/lib/jackson-jaxrs-1.8.8.jar new file mode 100644 index 0000000..21b31c2 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/jackson-jaxrs-1.8.8.jar differ diff --git a/aarch64/share/hadoop/common/lib/jackson-mapper-asl-1.8.8.jar b/aarch64/share/hadoop/common/lib/jackson-mapper-asl-1.8.8.jar new file mode 100644 index 0000000..7c7cd21 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/jackson-mapper-asl-1.8.8.jar differ diff --git a/aarch64/share/hadoop/common/lib/jackson-xc-1.8.8.jar b/aarch64/share/hadoop/common/lib/jackson-xc-1.8.8.jar new file mode 100644 index 0000000..ebfbf41 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/jackson-xc-1.8.8.jar differ diff --git a/aarch64/share/hadoop/common/lib/jasper-compiler-5.5.23.jar b/aarch64/share/hadoop/common/lib/jasper-compiler-5.5.23.jar new file mode 100644 index 0000000..170efa0 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/jasper-compiler-5.5.23.jar differ diff --git a/aarch64/share/hadoop/common/lib/jasper-runtime-5.5.23.jar b/aarch64/share/hadoop/common/lib/jasper-runtime-5.5.23.jar new file mode 100644 index 0000000..a3208c9 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/jasper-runtime-5.5.23.jar differ diff --git a/aarch64/share/hadoop/common/lib/jaxb-api-2.2.2.jar b/aarch64/share/hadoop/common/lib/jaxb-api-2.2.2.jar new file mode 100644 index 0000000..31e5fa0 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/jaxb-api-2.2.2.jar differ diff --git a/aarch64/share/hadoop/common/lib/jaxb-impl-2.2.3-1.jar b/aarch64/share/hadoop/common/lib/jaxb-impl-2.2.3-1.jar new file mode 100644 index 0000000..eeaf660 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/jaxb-impl-2.2.3-1.jar differ diff --git a/aarch64/share/hadoop/common/lib/jersey-core-1.9.jar b/aarch64/share/hadoop/common/lib/jersey-core-1.9.jar new file mode 100644 index 0000000..548dd88 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/jersey-core-1.9.jar differ diff --git a/aarch64/share/hadoop/common/lib/jersey-json-1.9.jar b/aarch64/share/hadoop/common/lib/jersey-json-1.9.jar new file mode 100644 index 0000000..b1a4ce5 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/jersey-json-1.9.jar differ diff --git a/aarch64/share/hadoop/common/lib/jersey-server-1.9.jar b/aarch64/share/hadoop/common/lib/jersey-server-1.9.jar new file mode 100644 index 0000000..ae0117c Binary files /dev/null and b/aarch64/share/hadoop/common/lib/jersey-server-1.9.jar differ diff --git a/aarch64/share/hadoop/common/lib/jets3t-0.6.1.jar b/aarch64/share/hadoop/common/lib/jets3t-0.6.1.jar new file mode 100644 index 0000000..e4048dd Binary files /dev/null and b/aarch64/share/hadoop/common/lib/jets3t-0.6.1.jar differ diff --git a/aarch64/share/hadoop/common/lib/jettison-1.1.jar b/aarch64/share/hadoop/common/lib/jettison-1.1.jar new file mode 100644 index 0000000..e4e9c8c Binary files /dev/null and b/aarch64/share/hadoop/common/lib/jettison-1.1.jar differ diff --git a/aarch64/share/hadoop/common/lib/jetty-6.1.26.jar b/aarch64/share/hadoop/common/lib/jetty-6.1.26.jar new file mode 100644 index 0000000..2cbe07a Binary files /dev/null and b/aarch64/share/hadoop/common/lib/jetty-6.1.26.jar differ diff --git a/aarch64/share/hadoop/common/lib/jetty-util-6.1.26.jar b/aarch64/share/hadoop/common/lib/jetty-util-6.1.26.jar new file mode 100644 index 0000000..cd23752 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/jetty-util-6.1.26.jar differ diff --git a/aarch64/share/hadoop/common/lib/jsch-0.1.42.jar b/aarch64/share/hadoop/common/lib/jsch-0.1.42.jar new file mode 100644 index 0000000..c65eff0 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/jsch-0.1.42.jar differ diff --git a/aarch64/share/hadoop/common/lib/jsp-api-2.1.jar b/aarch64/share/hadoop/common/lib/jsp-api-2.1.jar new file mode 100644 index 0000000..c0195af Binary files /dev/null and b/aarch64/share/hadoop/common/lib/jsp-api-2.1.jar differ diff --git a/aarch64/share/hadoop/common/lib/jsr305-1.3.9.jar b/aarch64/share/hadoop/common/lib/jsr305-1.3.9.jar new file mode 100644 index 0000000..a9afc66 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/jsr305-1.3.9.jar differ diff --git a/aarch64/share/hadoop/common/lib/junit-4.8.2.jar b/aarch64/share/hadoop/common/lib/junit-4.8.2.jar new file mode 100644 index 0000000..5b4bb84 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/junit-4.8.2.jar differ diff --git a/aarch64/share/hadoop/common/lib/log4j-1.2.17.jar b/aarch64/share/hadoop/common/lib/log4j-1.2.17.jar new file mode 100644 index 0000000..1d425cf Binary files /dev/null and b/aarch64/share/hadoop/common/lib/log4j-1.2.17.jar differ diff --git a/aarch64/share/hadoop/common/lib/mockito-all-1.8.5.jar b/aarch64/share/hadoop/common/lib/mockito-all-1.8.5.jar new file mode 100644 index 0000000..4b0395e Binary files /dev/null and b/aarch64/share/hadoop/common/lib/mockito-all-1.8.5.jar differ diff --git a/aarch64/share/hadoop/common/lib/netty-3.6.2.Final.jar b/aarch64/share/hadoop/common/lib/netty-3.6.2.Final.jar new file mode 100644 index 0000000..a421e28 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/netty-3.6.2.Final.jar differ diff --git a/aarch64/share/hadoop/common/lib/paranamer-2.3.jar b/aarch64/share/hadoop/common/lib/paranamer-2.3.jar new file mode 100644 index 0000000..ad12ae9 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/paranamer-2.3.jar differ diff --git a/aarch64/share/hadoop/common/lib/protobuf-java-2.5.0.jar b/aarch64/share/hadoop/common/lib/protobuf-java-2.5.0.jar new file mode 100644 index 0000000..4c4e686 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/protobuf-java-2.5.0.jar differ diff --git a/aarch64/share/hadoop/common/lib/servlet-api-2.5.jar b/aarch64/share/hadoop/common/lib/servlet-api-2.5.jar new file mode 100644 index 0000000..fb52493 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/servlet-api-2.5.jar differ diff --git a/aarch64/share/hadoop/common/lib/slf4j-api-1.7.5.jar b/aarch64/share/hadoop/common/lib/slf4j-api-1.7.5.jar new file mode 100644 index 0000000..8f004d3 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/slf4j-api-1.7.5.jar differ diff --git a/aarch64/share/hadoop/common/lib/slf4j-log4j12-1.7.5.jar b/aarch64/share/hadoop/common/lib/slf4j-log4j12-1.7.5.jar new file mode 100644 index 0000000..f5298b5 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/slf4j-log4j12-1.7.5.jar differ diff --git a/aarch64/share/hadoop/common/lib/snappy-java-1.0.4.1.jar b/aarch64/share/hadoop/common/lib/snappy-java-1.0.4.1.jar new file mode 100644 index 0000000..8198919 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/snappy-java-1.0.4.1.jar differ diff --git a/aarch64/share/hadoop/common/lib/stax-api-1.0.1.jar b/aarch64/share/hadoop/common/lib/stax-api-1.0.1.jar new file mode 100644 index 0000000..d9a1665 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/stax-api-1.0.1.jar differ diff --git a/aarch64/share/hadoop/common/lib/xmlenc-0.52.jar b/aarch64/share/hadoop/common/lib/xmlenc-0.52.jar new file mode 100644 index 0000000..ec568b4 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/xmlenc-0.52.jar differ diff --git a/aarch64/share/hadoop/common/lib/xz-1.0.jar b/aarch64/share/hadoop/common/lib/xz-1.0.jar new file mode 100644 index 0000000..a848f16 Binary files /dev/null and b/aarch64/share/hadoop/common/lib/xz-1.0.jar differ diff --git a/aarch64/share/hadoop/common/lib/zookeeper-3.4.5.jar b/aarch64/share/hadoop/common/lib/zookeeper-3.4.5.jar new file mode 100644 index 0000000..a7966bb Binary files /dev/null and b/aarch64/share/hadoop/common/lib/zookeeper-3.4.5.jar differ diff --git a/aarch64/share/hadoop/common/sources/hadoop-common-2.2.0-sources.jar b/aarch64/share/hadoop/common/sources/hadoop-common-2.2.0-sources.jar new file mode 100644 index 0000000..8214292 Binary files /dev/null and b/aarch64/share/hadoop/common/sources/hadoop-common-2.2.0-sources.jar differ diff --git a/aarch64/share/hadoop/common/sources/hadoop-common-2.2.0-test-sources.jar b/aarch64/share/hadoop/common/sources/hadoop-common-2.2.0-test-sources.jar new file mode 100644 index 0000000..0a236b9 Binary files /dev/null and b/aarch64/share/hadoop/common/sources/hadoop-common-2.2.0-test-sources.jar differ diff --git a/aarch64/share/hadoop/common/templates/core-site.xml b/aarch64/share/hadoop/common/templates/core-site.xml new file mode 100644 index 0000000..d2ddf89 --- /dev/null +++ b/aarch64/share/hadoop/common/templates/core-site.xml @@ -0,0 +1,20 @@ + + + + + + + + diff --git a/aarch64/share/hadoop/hdfs/hadoop-hdfs-2.2.0-tests.jar b/aarch64/share/hadoop/hdfs/hadoop-hdfs-2.2.0-tests.jar new file mode 100644 index 0000000..2db90ec Binary files /dev/null and b/aarch64/share/hadoop/hdfs/hadoop-hdfs-2.2.0-tests.jar differ diff --git a/aarch64/share/hadoop/hdfs/hadoop-hdfs-2.2.0.jar b/aarch64/share/hadoop/hdfs/hadoop-hdfs-2.2.0.jar new file mode 100644 index 0000000..95c8b8c Binary files /dev/null and b/aarch64/share/hadoop/hdfs/hadoop-hdfs-2.2.0.jar differ diff --git a/aarch64/share/hadoop/hdfs/hadoop-hdfs-nfs-2.2.0.jar b/aarch64/share/hadoop/hdfs/hadoop-hdfs-nfs-2.2.0.jar new file mode 100644 index 0000000..e0338e1 Binary files /dev/null and b/aarch64/share/hadoop/hdfs/hadoop-hdfs-nfs-2.2.0.jar differ diff --git a/aarch64/share/hadoop/hdfs/jdiff/hadoop-hdfs_0.20.0.xml b/aarch64/share/hadoop/hdfs/jdiff/hadoop-hdfs_0.20.0.xml new file mode 100644 index 0000000..823c3d8 --- /dev/null +++ b/aarch64/share/hadoop/hdfs/jdiff/hadoop-hdfs_0.20.0.xml @@ -0,0 +1,10389 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileNotFoundException is file does not exist.]]> + + + + + + + + + FileNotFoundException is file does not exist.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A distributed implementation of {@link +org.apache.hadoop.fs.FileSystem}. This is loosely modelled after +Google's GFS.

    + +

    The most important difference is that unlike GFS, Hadoop DFS files +have strictly one writer at any one time. Bytes are always appended +to the end of the writer's stream. There is no notion of "record appends" +or "mutations" that are then checked or reordered. Writers simply emit +a byte stream. That byte stream is guaranteed to be stored in the +order written.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Return {@link LocatedBlocks} which contains + file length, blocks and their locations. + DataNode locations for each block are sorted by + the distance to the client's address. +

    + The client will then have to contact + one of the indicated DataNodes to obtain the actual data. + + @param src file name + @param offset range start offset + @param length range length + @return file length and array of blocks with their locations + @throws IOException]]> + + + + + + + + + + + + + This will create an empty file specified by the source path. + The path should reflect a full path originated at the root. + The name-node does not have a notion of "current" directory for a client. +

    + Once created, the file is visible and available for read to other clients. + Although, other clients cannot {@link #delete(String)}, re-create or + {@link #rename(String, String)} it until the file is completed + or explicitly as a result of lease expiration. +

    + Blocks have a maximum size. Clients that intend to + create multi-block files must also use {@link #addBlock(String, String)}. + + @param src path of the file being created. + @param masked masked permission. + @param clientName name of the current client. + @param overwrite indicates whether the file should be + overwritten if it already exists. + @param replication block replication factor. + @param blockSize maximum block size. + + @throws AccessControlException if permission to create file is + denied by the system. As usually on the client side the exception will + be wrapped into {@link org.apache.hadoop.ipc.RemoteException}. + @throws QuotaExceededException if the file creation violates + any quota restriction + @throws IOException if other errors occur.]]> + + + + + + + + + + + + + + + + + The NameNode sets replication to the new value and returns. + The actual block replication is not expected to be performed during + this method call. The blocks will be populated or removed in the + background as the result of the routine block maintenance procedures. + + @param src file name + @param replication new replication + @throws IOException + @return true if successful; + false if file does not exist or is a directory]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Any blocks belonging to the deleted files will be garbage-collected. + + @param src existing name. + @return true only if the existing file or directory was actually removed + from the file system.]]> + + + + + + + + + same as delete but provides a way to avoid accidentally + deleting non empty directories programmatically. + @param src existing name + @param recursive if true deletes a non empty directory recursively, + else throws an exception. + @return true only if the existing file or directory was actually removed + from the file system.]]> + + + + + + + + + + + + + + + + + + + + + + + So, the NameNode will revoke the locks and live file-creates + for clients that it thinks have died. A client tells the + NameNode that it is still alive by periodically calling + renewLease(). If a certain amount of time passes since + the last call to renewLease(), the NameNode assumes the + client has died.]]> + + + + + + +

  • [0] contains the total storage capacity of the system, in bytes.
  • +
  • [1] contains the total used space of the system, in bytes.
  • +
  • [2] contains the available storage of the system, in bytes.
  • +
  • [3] contains number of under replicated blocks in the system.
  • +
  • [4] contains number of blocks with a corrupt replica.
  • +
  • [5] contains number of blocks without any good replicas left.
  • + + Use public constants like {@link #GET_STATS_CAPACITY_IDX} in place of + actual numbers to index into the array.]]> + + + + + + + + + + + + + + + + + + + + + + Safe mode is a name node state when it +
    1. does not accept changes to name space (read-only), and
    2. +
    3. does not replicate or delete blocks.
    + +

    + Safe mode is entered automatically at name node startup. + Safe mode can also be entered manually using + {@link #setSafeMode(FSConstants.SafeModeAction) setSafeMode(SafeModeAction.SAFEMODE_GET)}. +

    + At startup the name node accepts data node reports collecting + information about block locations. + In order to leave safe mode it needs to collect a configurable + percentage called threshold of blocks, which satisfy the minimal + replication condition. + The minimal replication condition is that each block must have at least + dfs.replication.min replicas. + When the threshold is reached the name node extends safe mode + for a configurable amount of time + to let the remaining data nodes to check in before it + will start replicating missing blocks. + Then the name node leaves safe mode. +

    + If safe mode is turned on manually using + {@link #setSafeMode(FSConstants.SafeModeAction) setSafeMode(SafeModeAction.SAFEMODE_ENTER)} + then the name node stays in safe mode until it is manually turned off + using {@link #setSafeMode(FSConstants.SafeModeAction) setSafeMode(SafeModeAction.SAFEMODE_LEAVE)}. + Current state of the name node can be verified using + {@link #setSafeMode(FSConstants.SafeModeAction) setSafeMode(SafeModeAction.SAFEMODE_GET)} +

    Configuration parameters:

    + dfs.safemode.threshold.pct is the threshold parameter.
    + dfs.safemode.extension is the safe mode extension parameter.
    + dfs.replication.min is the minimal replication parameter. + +

    Special cases:

    + The name node does not enter safe mode at startup if the threshold is + set to 0 or if the name space is empty.
    + If the threshold is set to 1 then all blocks need to have at least + minimal replication.
    + If the threshold value is greater than 1 then the name node will not be + able to turn off safe mode automatically.
    + Safe mode can always be turned off manually. + + @param action
    • 0 leave safe mode;
    • +
    • 1 enter safe mode;
    • +
    • 2 get safe mode state.
    + @return
    • 0 if the safe mode is OFF or
    • +
    • 1 if the safe mode is ON.
    + @throws IOException]]> +
    +
    + + + + + Saves current namespace into storage directories and reset edits log. + Requires superuser privilege and safe mode. + + @throws AccessControlException if the superuser privilege is violated. + @throws IOException if image creation failed.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + The quota can have three types of values : (1) 0 or more will set + the quota to that value, (2) {@link FSConstants#QUOTA_DONT_SET} implies + the quota will not be changed, and (3) {@link FSConstants#QUOTA_RESET} + implies the quota will be reset. Any other value is a runtime error. + + @throws FileNotFoundException if the path is a file or + does not exist + @throws QuotaExceededException if the directory size + is greater than the given quota]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + The message for the exception specifies the directory where the quota + was violated and actual quotas.]]> +
    +
    + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The balancer is a tool that balances disk space usage on an HDFS cluster + when some datanodes become full or when new empty nodes join the cluster. + The tool is deployed as an application program that can be run by the + cluster administrator on a live HDFS cluster while applications + adding and deleting files. + +

    SYNOPSIS +

    + To start:
    +      bin/start-balancer.sh [-threshold ]
    +      Example: bin/ start-balancer.sh 
    +                     start the balancer with a default threshold of 10%
    +               bin/ start-balancer.sh -threshold 5
    +                     start the balancer with a threshold of 5%
    + To stop:
    +      bin/ stop-balancer.sh
    + 
    + +

    DESCRIPTION +

    The threshold parameter is a fraction in the range of (0%, 100%) with a + default value of 10%. The threshold sets a target for whether the cluster + is balanced. A cluster is balanced if for each datanode, the utilization + of the node (ratio of used space at the node to total capacity of the node) + differs from the utilization of the (ratio of used space in the cluster + to total capacity of the cluster) by no more than the threshold value. + The smaller the threshold, the more balanced a cluster will become. + It takes more time to run the balancer for small threshold values. + Also for a very small threshold the cluster may not be able to reach the + balanced state when applications write and delete files concurrently. + +

    The tool moves blocks from highly utilized datanodes to poorly + utilized datanodes iteratively. In each iteration a datanode moves or + receives no more than the lesser of 10G bytes or the threshold fraction + of its capacity. Each iteration runs no more than 20 minutes. + At the end of each iteration, the balancer obtains updated datanodes + information from the namenode. + +

    A system property that limits the balancer's use of bandwidth is + defined in the default configuration file: +

    + 
    +   dfs.balance.bandwidthPerSec
    +   1048576
    +   Specifies the maximum bandwidth that each datanode 
    + can utilize for the balancing purpose in term of the number of bytes 
    + per second. 
    + 
    + 
    + +

    This property determines the maximum speed at which a block will be + moved from one datanode to another. The default value is 1MB/s. The higher + the bandwidth, the faster a cluster can reach the balanced state, + but with greater competition with application processes. If an + administrator changes the value of this property in the configuration + file, the change is observed when HDFS is next restarted. + +

    MONITERING BALANCER PROGRESS +

    After the balancer is started, an output file name where the balancer + progress will be recorded is printed on the screen. The administrator + can monitor the running of the balancer by reading the output file. + The output shows the balancer's status iteration by iteration. In each + iteration it prints the starting time, the iteration number, the total + number of bytes that have been moved in the previous iterations, + the total number of bytes that are left to move in order for the cluster + to be balanced, and the number of bytes that are being moved in this + iteration. Normally "Bytes Already Moved" is increasing while "Bytes Left + To Move" is decreasing. + +

    Running multiple instances of the balancer in an HDFS cluster is + prohibited by the tool. + +

    The balancer automatically exits when any of the following five + conditions is satisfied: +

      +
    1. The cluster is balanced; +
    2. No block can be moved; +
    3. No block has been moved for five consecutive iterations; +
    4. An IOException occurs while communicating with the namenode; +
    5. Another balancer is running. +
    + +

    Upon exit, a balancer returns an exit code and prints one of the + following messages to the output file in corresponding to the above exit + reasons: +

      +
    1. The cluster is balanced. Exiting +
    2. No block can be moved. Exiting... +
    3. No block has been moved for 3 iterations. Exiting... +
    4. Received an IO exception: failure reason. Exiting... +
    5. Another balancer is running. Exiting... +
    + +

    The administrator can interrupt the execution of the balancer at any + time by running the command "stop-balancer.sh" on the machine where the + balancer is running.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if exclusive locks are supported or + false otherwise. + @throws IOException + @see StorageDirectory#lock()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Local storage information is stored in a separate file VERSION. + It contains type of the node, + the storage layout version, the namespace id, and + the fs state creation time. +

    + Local storage can reside in multiple directories. + Each directory should contain the same VERSION file as the others. + During startup Hadoop servers (name-node and data-nodes) read their local + storage information from them. +

    + The servers hold a lock for each storage directory while they run so that + other nodes were not able to startup sharing the same storage. + The locks are released when the servers stop (normally or abnormally).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Removes contents of the current directory and creates an empty directory. + + This does not fully format storage directory. + It cannot write the version file since it should be written last after + all other storage type dependent files are written. + Derived storage is responsible for setting specific storage values and + writing the version file to disk. + + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Locking is not supported by all file systems. + E.g., NFS does not consistently support exclusive locks. + +

    If locking is supported we guarantee exculsive access to the + storage directory. Otherwise, no guarantee is given. + + @throws IOException if locking fails]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + stream of bytes (of BLOCK_SIZE or less) + + This info is stored on a local disk. The DataNode + reports the table's contents to the NameNode upon startup + and every so often afterwards. + + DataNodes spend their lives in an endless loop of asking + the NameNode for something to do. A NameNode cannot connect + to a DataNode directly; a NameNode simply returns values from + functions invoked by a DataNode. + + DataNodes maintain an open server socket so that client code + or other DataNodes can read/write data. The host/port for + this server is reported to the NameNode, which then sends that + information to clients or other DataNodes that might be interested.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @see Storage]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + " + + Many of the activity metrics are sampled and averaged on an interval + which can be specified in the metrics config file. +

    + For the metrics that are sampled and averaged, one must specify + a metrics context that does periodic update calls. Most metrics contexts do. + The default Null metrics context however does NOT. So if you aren't + using any other metrics context then you can turn on the viewing and averaging + of sampled metrics by specifying the following two lines + in the hadoop-meterics.properties file: +

    +        dfs.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
    +        dfs.period=10
    +  
    +

    + Note that the metrics are collected regardless of the context used. + The context with the update thread is used to average the data periodically + + + + Impl details: We use a dynamic mbean that gets the list of the metrics + from the metrics registry passed as an argument to the constructor]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class has a number of metrics variables that are publicly accessible; + these variables (objects) have methods to update their values; + for example: +

    {@link #blocksRead}.inc()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Data Node runtime statistic info is report in another MBean + @see org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeStatisticsMBean]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + TreeSet]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + :/data[/] HTTP/1.1 + }]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + =0.16)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The purpose of registration is to identify whether the new datanode + serves a new data storage, and will report new data block copies, + which the namenode was not aware of; or the datanode is a replacement + node for the data storage that was previously served by a different + or the same (in terms of host:port) datanode. + The data storages are distinguished by their storageIDs. When a new + data storage is reported the namenode issues a new unique storageID. +

    + Finally, the namenode returns its namespaceID as the registrationID + for the datanodes. + namespaceID is a persistent attribute of the name space. + The registrationID is checked every time the datanode is communicating + with the namenode. + Datanodes with inappropriate registrationID are rejected. + If the namenode stops, and then restarts it can restore its + namespaceID and will continue serving the datanodes that has previously + registered with the namenode without restarting the whole cluster. + + @see org.apache.hadoop.hdfs.server.datanode.DataNode#register()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + blocklist) and (block-->machinelist) tables.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + no further work needed here. + 2. Removed from hosts --> mark AdminState as decommissioned. + 3. Added to exclude --> start decommission. + 4. Removed from exclude --> stop decommission.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + blocklist (kept on disk, logged) + 2) Set of all valid blocks (inverted #1) + 3) block --> machinelist (kept in memory, rebuilt dynamically from reports) + 4) machine --> blocklist (inverted #2) + 5) LRU cache of updated-heartbeat machines]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + :/listPaths[/][[&option]*] HTTP/1.1 + } + + Where option (default) in: + recursive ("no") + filter (".*") + exclude ("\..*\.crc") + + Response: A flat list of files/directories in the following format: + {@code +

    + + + + }]]> +
    + + + + + + +
    + + + + + + + + + + + + + The name-node can be started with one of the following startup options: +
      +
    • {@link StartupOption#REGULAR REGULAR} - normal name node startup
    • +
    • {@link StartupOption#FORMAT FORMAT} - format name node
    • +
    • {@link StartupOption#UPGRADE UPGRADE} - start the cluster + upgrade and create a snapshot of the current file system state
    • +
    • {@link StartupOption#ROLLBACK ROLLBACK} - roll the + cluster back to the previous state
    • +
    + The option is passed via configuration field: + dfs.namenode.startup + + The conf will be modified to reflect the actual ports on which + the NameNode is up and running if the user passes the port as + zero in the conf. + + @param conf confirguration + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + datanode whose + total size is size + + @param datanode on which blocks are located + @param size total size of blocks]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + blocksequence (namespace) + 2) block->machinelist ("inodes") + + The first table is stored on disk and is very precious. + The second table is rebuilt every time the NameNode comes + up. + + 'NameNode' refers to both this class as well as the 'NameNode server'. + The 'FSNamesystem' class actually performs most of the filesystem + management. The majority of the 'NameNode' class itself is concerned + with exposing the IPC interface and the http server to the outside world, + plus some configuration management. + + NameNode implements the ClientProtocol interface, which allows + clients to ask for DFS services. ClientProtocol is not + designed for direct use by authors of DFS client code. End-users + should instead use the org.apache.nutch.hadoop.fs.FileSystem class. + + NameNode also implements the DatanodeProtocol interface, used by + DataNode programs that actually store DFS data blocks. These + methods are invoked repeatedly and automatically by all the + DataNodes in a DFS deployment. + + NameNode also implements the NamenodeProtocol interface, used by + secondary namenodes or rebalancing processes to get partial namenode's + state, for example partial blocksMap etc.]]> + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The tool scans all files and directories, starting from an indicated + root path. The following abnormal conditions are detected and handled:

    +
      +
    • files with blocks that are completely missing from all datanodes.
      + In this case the tool can perform one of the following actions: +
        +
      • none ({@link #FIXING_NONE})
      • +
      • move corrupted files to /lost+found directory on DFS + ({@link #FIXING_MOVE}). Remaining data blocks are saved as a + block chains, representing longest consecutive series of valid blocks.
      • +
      • delete corrupted files ({@link #FIXING_DELETE})
      • +
      +
    • +
    • detect files with under-replicated or over-replicated blocks
    • +
    + Additionally, the tool collects a detailed overall DFS statistics, and + optionally can print detailed statistics on block locations and replication + factors of each file.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Name Node runtime activity statistic info is report in another MBean + @see org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeActivityMBean]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class has a number of metrics variables that are publicly accessible; + these variables (objects) have methods to update their values; + for example: +

    {@link #filesTotal}.set()]]> + + + + + + + + + + + + For the metrics that are sampled and averaged, one must specify + a metrics context that does periodic update calls. Most metrics contexts do. + The default Null metrics context however does NOT. So if you aren't + using any other metrics context then you can turn on the viewing and averaging + of sampled metrics by specifying the following two lines + in the hadoop-meterics.properties file: +

    +        dfs.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
    +        dfs.period=10
    +  
    +

    + Note that the metrics are collected regardless of the context used. + The context with the update thread is used to average the data periodically + + + + Impl details: We use a dynamic mbean that gets the list of the metrics + from the metrics registry passed as an argument to the constructor]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class has a number of metrics variables that are publicly accessible; + these variables (objects) have methods to update their values; + for example: +

    {@link #syncs}.inc()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + datanode + whose total size is equal to size + @param datanode a data node + @param size requested size + @return a list of blocks & their locations + @throws RemoteException if size is less than or equal to 0 or + datanode does not exist]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The tool scans all files and directories, starting from an indicated + root path. The following abnormal conditions are detected and handled:

    +
      +
    • files with blocks that are completely missing from all datanodes.
      + In this case the tool can perform one of the following actions: +
        +
      • none ({@link org.apache.hadoop.hdfs.server.namenode.NamenodeFsck#FIXING_NONE})
      • +
      • move corrupted files to /lost+found directory on DFS + ({@link org.apache.hadoop.hdfs.server.namenode.NamenodeFsck#FIXING_MOVE}). Remaining data blocks are saved as a + block chains, representing longest consecutive series of valid blocks.
      • +
      • delete corrupted files ({@link org.apache.hadoop.hdfs.server.namenode.NamenodeFsck#FIXING_DELETE})
      • +
      +
    • +
    • detect files with under-replicated or over-replicated blocks
    • +
    + Additionally, the tool collects a detailed overall DFS statistics, and + optionally can print detailed statistics on block locations and replication + factors of each file. + The tool also provides and option to filter open files during the scan.]]> +
    +
    + +
    + +
    diff --git a/aarch64/share/hadoop/hdfs/jdiff/hadoop-hdfs_0.21.0.xml b/aarch64/share/hadoop/hdfs/jdiff/hadoop-hdfs_0.21.0.xml new file mode 100644 index 0000000..7fab725 --- /dev/null +++ b/aarch64/share/hadoop/hdfs/jdiff/hadoop-hdfs_0.21.0.xml @@ -0,0 +1,16220 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileNotFoundException is file does not exist. + @throws UnresolvedLinkException if the path contains a symlink.]]> + + + + + + + + + + FileNotFoundException is file does not exist. + @throws UnresolvedLinkException if the path contains a symlink.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A distributed implementation of {@link +org.apache.hadoop.fs.FileSystem}. This is loosely modelled after +Google's GFS.

    + +

    The most important difference is that unlike GFS, Hadoop DFS files +have strictly one writer at any one time. Bytes are always appended +to the end of the writer's stream. There is no notion of "record appends" +or "mutations" that are then checked or reordered. Writers simply emit +a byte stream. That byte stream is guaranteed to be stored in the +order written.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Return {@link LocatedBlocks} which contains + file length, blocks and their locations. + DataNode locations for each block are sorted by + the distance to the client's address. +

    + The client will then have to contact + one of the indicated DataNodes to obtain the actual data. + + @param src file name + @param offset range start offset + @param length range length + @return file length and array of blocks with their locations + @throws IOException + @throws UnresolvedLinkException if the path contains a symlink. + @throws FileNotFoundException if the path does not exist.]]> + + + + + + + + + + + + + + + + + + + + + + + This will create an empty file specified by the source path. + The path should reflect a full path originated at the root. + The name-node does not have a notion of "current" directory for a client. +

    + Once created, the file is visible and available for read to other clients. + Although, other clients cannot {@link #delete(String, boolean)}, re-create or + {@link #rename(String, String)} it until the file is completed + or explicitly as a result of lease expiration. +

    + Blocks have a maximum size. Clients that intend to create + multi-block files must also use {@link #addBlock(String, String, Block, DatanodeInfo[])}. + + @param src path of the file being created. + @param masked masked permission. + @param clientName name of the current client. + @param flag indicates whether the file should be + overwritten if it already exists or create if it does not exist or append. + @param createParent create missing parent directory if true + @param replication block replication factor. + @param blockSize maximum block size. + + @throws AccessControlException if permission to create file is + denied by the system. As usually on the client side the exception will + be wrapped into {@link org.apache.hadoop.ipc.RemoteException}. + @throws QuotaExceededException if the file creation violates + any quota restriction + @throws IOException if other errors occur. + @throws UnresolvedLinkException if the path contains a symlink. + @throws AlreadyBeingCreatedException if the path does not exist. + @throws NSQuotaExceededException if the namespace quota is exceeded.]]> + + + + + + + + + + + + + + + + + + + The NameNode sets replication to the new value and returns. + The actual block replication is not expected to be performed during + this method call. The blocks will be populated or removed in the + background as the result of the routine block maintenance procedures. + + @param src file name + @param replication new replication + @throws IOException + @return true if successful; + false if file does not exist or is a directory + @throws UnresolvedLinkException if the path contains a symlink.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

  • Fails if src is a file and dst is a directory. +
  • Fails if src is a directory and dst is a file. +
  • Fails if the parent of dst does not exist or is a file. + +

    + Without OVERWRITE option, rename fails if the dst already exists. + With OVERWRITE option, rename overwrites the dst, if it is a file + or an empty directory. Rename fails if dst is a non-empty directory. +

    + This implementation of rename is atomic. +

    + @param src existing file or directory name. + @param dst new name. + @param options Rename options + @throws IOException if rename failed + @throws UnresolvedLinkException if the path contains a symlink.]]> + + + + + + + + + Any blocks belonging to the deleted files will be garbage-collected. + + @param src existing name. + @return true only if the existing file or directory was actually removed + from the file system. + @throws UnresolvedLinkException if the path contains a symlink. + @deprecated use {@link #delete(String, boolean)} istead.]]> + + + + + + + + + + same as delete but provides a way to avoid accidentally + deleting non empty directories programmatically. + @param src existing name + @param recursive if true deletes a non empty directory recursively, + else throws an exception. + @return true only if the existing file or directory was actually removed + from the file system. + @throws UnresolvedLinkException if the path contains a symlink.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + So, the NameNode will revoke the locks and live file-creates + for clients that it thinks have died. A client tells the + NameNode that it is still alive by periodically calling + renewLease(). If a certain amount of time passes since + the last call to renewLease(), the NameNode assumes the + client has died. + @throws UnresolvedLinkException if the path contains a symlink.]]> + + + + + + +

  • [0] contains the total storage capacity of the system, in bytes.
  • +
  • [1] contains the total used space of the system, in bytes.
  • +
  • [2] contains the available storage of the system, in bytes.
  • +
  • [3] contains number of under replicated blocks in the system.
  • +
  • [4] contains number of blocks with a corrupt replica.
  • +
  • [5] contains number of blocks without any good replicas left.
  • + + Use public constants like {@link #GET_STATS_CAPACITY_IDX} in place of + actual numbers to index into the array.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + Safe mode is a name node state when it +
    1. does not accept changes to name space (read-only), and
    2. +
    3. does not replicate or delete blocks.
    + +

    + Safe mode is entered automatically at name node startup. + Safe mode can also be entered manually using + {@link #setSafeMode(FSConstants.SafeModeAction) setSafeMode(SafeModeAction.SAFEMODE_GET)}. +

    + At startup the name node accepts data node reports collecting + information about block locations. + In order to leave safe mode it needs to collect a configurable + percentage called threshold of blocks, which satisfy the minimal + replication condition. + The minimal replication condition is that each block must have at least + dfs.namenode.replication.min replicas. + When the threshold is reached the name node extends safe mode + for a configurable amount of time + to let the remaining data nodes to check in before it + will start replicating missing blocks. + Then the name node leaves safe mode. +

    + If safe mode is turned on manually using + {@link #setSafeMode(FSConstants.SafeModeAction) setSafeMode(SafeModeAction.SAFEMODE_ENTER)} + then the name node stays in safe mode until it is manually turned off + using {@link #setSafeMode(FSConstants.SafeModeAction) setSafeMode(SafeModeAction.SAFEMODE_LEAVE)}. + Current state of the name node can be verified using + {@link #setSafeMode(FSConstants.SafeModeAction) setSafeMode(SafeModeAction.SAFEMODE_GET)} +

    Configuration parameters:

    + dfs.safemode.threshold.pct is the threshold parameter.
    + dfs.safemode.extension is the safe mode extension parameter.
    + dfs.namenode.replication.min is the minimal replication parameter. + +

    Special cases:

    + The name node does not enter safe mode at startup if the threshold is + set to 0 or if the name space is empty.
    + If the threshold is set to 1 then all blocks need to have at least + minimal replication.
    + If the threshold value is greater than 1 then the name node will not be + able to turn off safe mode automatically.
    + Safe mode can always be turned off manually. + + @param action
    • 0 leave safe mode;
    • +
    • 1 enter safe mode;
    • +
    • 2 get safe mode state.
    + @return
    • 0 if the safe mode is OFF or
    • +
    • 1 if the safe mode is ON.
    + @throws IOException]]> +
    +
    + + + + + Saves current namespace into storage directories and reset edits log. + Requires superuser privilege and safe mode. + + @throws AccessControlException if the superuser privilege is violated. + @throws IOException if image creation failed.]]> + + + + + + + + sets flag to enable restore of failed storage replicas + + @throws AccessControlException if the superuser privilege is violated.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + The quota can have three types of values : (1) 0 or more will set + the quota to that value, (2) {@link FSConstants#QUOTA_DONT_SET} implies + the quota will not be changed, and (3) {@link FSConstants#QUOTA_RESET} + implies the quota will be reset. Any other value is a runtime error. + @throws UnresolvedLinkException if the path contains a symlink. + @throws FileNotFoundException if the path is a file or + does not exist + @throws QuotaExceededException if the directory size + is greater than the given quota]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + The message for the exception specifies the directory where the quota + was violated and actual quotas. Specific message is generated in the + corresponding Exception class: + DSQuotaExceededException or + NSQuotaExceededException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The balancer is a tool that balances disk space usage on an HDFS cluster + when some datanodes become full or when new empty nodes join the cluster. + The tool is deployed as an application program that can be run by the + cluster administrator on a live HDFS cluster while applications + adding and deleting files. + +

    SYNOPSIS +

    + To start:
    +      bin/start-balancer.sh [-threshold ]
    +      Example: bin/ start-balancer.sh 
    +                     start the balancer with a default threshold of 10%
    +               bin/ start-balancer.sh -threshold 5
    +                     start the balancer with a threshold of 5%
    + To stop:
    +      bin/ stop-balancer.sh
    + 
    + +

    DESCRIPTION +

    The threshold parameter is a fraction in the range of (0%, 100%) with a + default value of 10%. The threshold sets a target for whether the cluster + is balanced. A cluster is balanced if for each datanode, the utilization + of the node (ratio of used space at the node to total capacity of the node) + differs from the utilization of the (ratio of used space in the cluster + to total capacity of the cluster) by no more than the threshold value. + The smaller the threshold, the more balanced a cluster will become. + It takes more time to run the balancer for small threshold values. + Also for a very small threshold the cluster may not be able to reach the + balanced state when applications write and delete files concurrently. + +

    The tool moves blocks from highly utilized datanodes to poorly + utilized datanodes iteratively. In each iteration a datanode moves or + receives no more than the lesser of 10G bytes or the threshold fraction + of its capacity. Each iteration runs no more than 20 minutes. + At the end of each iteration, the balancer obtains updated datanodes + information from the namenode. + +

    A system property that limits the balancer's use of bandwidth is + defined in the default configuration file: +

    + 
    +   dfs.balance.bandwidthPerSec
    +   1048576
    +   Specifies the maximum bandwidth that each datanode 
    + can utilize for the balancing purpose in term of the number of bytes 
    + per second. 
    + 
    + 
    + +

    This property determines the maximum speed at which a block will be + moved from one datanode to another. The default value is 1MB/s. The higher + the bandwidth, the faster a cluster can reach the balanced state, + but with greater competition with application processes. If an + administrator changes the value of this property in the configuration + file, the change is observed when HDFS is next restarted. + +

    MONITERING BALANCER PROGRESS +

    After the balancer is started, an output file name where the balancer + progress will be recorded is printed on the screen. The administrator + can monitor the running of the balancer by reading the output file. + The output shows the balancer's status iteration by iteration. In each + iteration it prints the starting time, the iteration number, the total + number of bytes that have been moved in the previous iterations, + the total number of bytes that are left to move in order for the cluster + to be balanced, and the number of bytes that are being moved in this + iteration. Normally "Bytes Already Moved" is increasing while "Bytes Left + To Move" is decreasing. + +

    Running multiple instances of the balancer in an HDFS cluster is + prohibited by the tool. + +

    The balancer automatically exits when any of the following five + conditions is satisfied: +

      +
    1. The cluster is balanced; +
    2. No block can be moved; +
    3. No block has been moved for five consecutive iterations; +
    4. An IOException occurs while communicating with the namenode; +
    5. Another balancer is running. +
    + +

    Upon exit, a balancer returns an exit code and prints one of the + following messages to the output file in corresponding to the above exit + reasons: +

      +
    1. The cluster is balanced. Exiting +
    2. No block can be moved. Exiting... +
    3. No block has been moved for 3 iterations. Exiting... +
    4. Received an IO exception: failure reason. Exiting... +
    5. Another balancer is running. Exiting... +
    + +

    The administrator can interrupt the execution of the balancer at any + time by running the command "stop-balancer.sh" on the machine where the + balancer is running.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The block has at least one {@link ReplicaState#FINALIZED} replica, + and is not going to be modified.]]> + + + + + + It has been recently allocated for write or append.]]> + + + + + + When a file lease expires its last block may not be {@link #COMPLETE} + and needs to go through a recovery procedure, + which synchronizes the existing replicas contents.]]> + + + + + + The client reported that all bytes are written to data-nodes + with the given generation stamp and block length, but no + {@link ReplicaState#FINALIZED} + replicas has yet been reported by data-nodes themselves.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if exclusive locks are supported or + false otherwise. + @throws IOException + @see StorageDirectory#lock()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Local storage information is stored in a separate file VERSION. + It contains type of the node, + the storage layout version, the namespace id, and + the fs state creation time. +

    + Local storage can reside in multiple directories. + Each directory should contain the same VERSION file as the others. + During startup Hadoop servers (name-node and data-nodes) read their local + storage information from them. +

    + The servers hold a lock for each storage directory while they run so that + other nodes were not able to startup sharing the same storage. + The locks are released when the servers stop (normally or abnormally).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Removes contents of the current directory and creates an empty directory. + + This does not fully format storage directory. + It cannot write the version file since it should be written last after + all other storage type dependent files are written. + Derived storage is responsible for setting specific storage values and + writing the version file to disk. + + @throws IOException]]> + + + + + + + + + + +

  • node type
  • +
  • layout version
  • +
  • namespaceID
  • +
  • fs state creation time
  • +
  • other fields specific for this node type
  • + + The version file is always written last during storage directory updates. + The existence of the version file indicates that all other files have + been successfully written in the storage directory, the storage is valid + and does not need to be recovered. + + @return the version file path]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Locking is not supported by all file systems. + E.g., NFS does not consistently support exclusive locks. + +

    If locking is supported we guarantee exculsive access to the + storage directory. Otherwise, no guarantee is given. + + @throws IOException if locking fails]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Assigned to the file system at formatting and never changes after that. + Shared by all file system components.]]> + + + + + + Modified during upgrades.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + stream of bytes (of BLOCK_SIZE or less) + + This info is stored on a local disk. The DataNode + reports the table's contents to the NameNode upon startup + and every so often afterwards. + + DataNodes spend their lives in an endless loop of asking + the NameNode for something to do. A NameNode cannot connect + to a DataNode directly; a NameNode simply returns values from + functions invoked by a DataNode. + + DataNodes maintain an open server socket so that client code + or other DataNodes can read/write data. The host/port for + this server is reported to the NameNode, which then sends that + information to clients or other DataNodes that might be interested.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @see Storage]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

  • If the block file is missing, delete the block from volumeMap
  • +
  • If the block file exists and the block is missing in volumeMap, + add the block to volumeMap
  • +
  • If generation stamp does not match, then update the block with right + generation stamp
  • +
  • If the block length in memory does not match the actual block file length + then mark the block as corrupt and update the block length in memory
  • +
  • If the file in {@link ReplicaInfo} does not match the file on + the disk, update {@link ReplicaInfo} with the correct file
  • + + + @param blockId Block that differs + @param diskFile Block file on the disk + @param diskMetaFile Metadata file from on the disk + @param vol Volume of the block file]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + -" + + Many of the activity metrics are sampled and averaged on an interval + which can be specified in the metrics config file. +

    + For the metrics that are sampled and averaged, one must specify + a metrics context that does periodic update calls. Most metrics contexts do. + The default Null metrics context however does NOT. So if you aren't + using any other metrics context then you can turn on the viewing and averaging + of sampled metrics by specifying the following two lines + in the hadoop-meterics.properties file: +

    +        dfs.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
    +        dfs.period=10
    +  
    +

    + Note that the metrics are collected regardless of the context used. + The context with the update thread is used to average the data periodically + + + + Impl details: We use a dynamic mbean that gets the list of the metrics + from the metrics registry passed as an argument to the constructor]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class has a number of metrics variables that are publicly accessible; + these variables (objects) have methods to update their values; + for example: +

    {@link #blocksRead}.inc()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Data Node runtime statistic info is report in another MBean + @see org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeStatisticsMBean]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Backup node can play two roles. +

      +
    1. {@link NamenodeRole#CHECKPOINT} node periodically creates checkpoints, + that is downloads image and edits from the active node, merges them, and + uploads the new image back to the active.
    2. +
    3. {@link NamenodeRole#BACKUP} node keeps its namespace in sync with the + active node, and periodically creates checkpoints by simply saving the + namespace image to local disk(s).
    4. +
    ]]> +
    +
    + + + + + + + + + + + + + + + + blocklist) and (block-->machinelist) tables.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + TreeSet]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + :/data[/] HTTP/1.1 + }]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + =0.16)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The purpose of registration is to identify whether the new datanode + serves a new data storage, and will report new data block copies, + which the namenode was not aware of; or the datanode is a replacement + node for the data storage that was previously served by a different + or the same (in terms of host:port) datanode. + The data storages are distinguished by their storageIDs. When a new + data storage is reported the namenode issues a new unique storageID. +

    + Finally, the namenode returns its namespaceID as the registrationID + for the datanodes. + namespaceID is a persistent attribute of the name space. + The registrationID is checked every time the datanode is communicating + with the namenode. + Datanodes with inappropriate registrationID are rejected. + If the namenode stops, and then restarts it can restore its + namespaceID and will continue serving the datanodes that has previously + registered with the namenode without restarting the whole cluster. + + @see org.apache.hadoop.hdfs.server.datanode.DataNode#register()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + blocklist) and (block-->machinelist) tables.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + no further work needed here. + 2. Removed from hosts --> mark AdminState as decommissioned. + 3. Added to exclude --> start decommission. + 4. Removed from exclude --> stop decommission.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key=value pairs to be written for the following properties: + + ugi=<ugi in RPC> + ip=<remote IP> + cmd=<command> + src=<src path> + dst=<dst path (optional)> + perm=<permissions (optional)> + ]]> + + + + + + + + + + + + blocklist (kept on disk, logged) + 2) Set of all valid blocks (inverted #1) + 3) block --> machinelist (kept in memory, rebuilt dynamically from reports) + 4) machine --> blocklist (inverted #2) + 5) LRU cache of updated-heartbeat machines]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + :/listPaths[/][[&option]*] HTTP/1.1 + } + + Where option (default) in: + recursive ("no") + filter (".*") + exclude ("\..*\.crc") + + Response: A flat list of files/directories in the following format: + {@code +

    + + + + }]]> +
    +
    + + + + + +
    + + + + + + + + + + The name-node can be started with one of the following startup options: +
      +
    • {@link StartupOption#REGULAR REGULAR} - normal name node startup
    • +
    • {@link StartupOption#FORMAT FORMAT} - format name node
    • +
    • {@link StartupOption#BACKUP BACKUP} - start backup node
    • +
    • {@link StartupOption#CHECKPOINT CHECKPOINT} - start checkpoint node
    • +
    • {@link StartupOption#UPGRADE UPGRADE} - start the cluster + upgrade and create a snapshot of the current file system state
    • +
    • {@link StartupOption#ROLLBACK ROLLBACK} - roll the + cluster back to the previous state
    • +
    • {@link StartupOption#FINALIZE FINALIZE} - finalize + previous upgrade
    • +
    • {@link StartupOption#IMPORT IMPORT} - import checkpoint
    • +
    + The option is passed via configuration field: + dfs.namenode.startup + + The conf will be modified to reflect the actual ports on which + the NameNode is up and running if the user passes the port as + zero in the conf. + + @param conf confirguration + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + blocksequence (namespace) + 2) block->machinelist ("inodes") + + The first table is stored on disk and is very precious. + The second table is rebuilt every time the NameNode comes + up. + + 'NameNode' refers to both this class as well as the 'NameNode server'. + The 'FSNamesystem' class actually performs most of the filesystem + management. The majority of the 'NameNode' class itself is concerned + with exposing the IPC interface and the http server to the outside world, + plus some configuration management. + + NameNode implements the ClientProtocol interface, which allows + clients to ask for DFS services. ClientProtocol is not + designed for direct use by authors of DFS client code. End-users + should instead use the org.apache.nutch.hadoop.fs.FileSystem class. + + NameNode also implements the DatanodeProtocol interface, used by + DataNode programs that actually store DFS data blocks. These + methods are invoked repeatedly and automatically by all the + DataNodes in a DFS deployment. + + NameNode also implements the NamenodeProtocol interface, used by + secondary namenodes or rebalancing processes to get partial namenode's + state, for example partial blocksMap etc.]]> + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The tool scans all files and directories, starting from an indicated + root path. The following abnormal conditions are detected and handled:

    +
      +
    • files with blocks that are completely missing from all datanodes.
      + In this case the tool can perform one of the following actions: +
        +
      • none ({@link #FIXING_NONE})
      • +
      • move corrupted files to /lost+found directory on DFS + ({@link #FIXING_MOVE}). Remaining data blocks are saved as a + block chains, representing longest consecutive series of valid blocks.
      • +
      • delete corrupted files ({@link #FIXING_DELETE})
      • +
      +
    • +
    • detect files with under-replicated or over-replicated blocks
    • +
    + Additionally, the tool collects a detailed overall DFS statistics, and + optionally can print detailed statistics on block locations and replication + factors of each file.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Name Node runtime activity statistic info is report in another MBean + @see org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeActivityMBean]]> + + + + + + + + + + + + + + + + + + + This class has a number of metrics variables that are publicly accessible; + these variables (objects) have methods to update their values; + for example: +

    {@link #filesTotal}.set()]]> + + + + + + + + + + + + For the metrics that are sampled and averaged, one must specify + a metrics context that does periodic update calls. Most metrics contexts do. + The default Null metrics context however does NOT. So if you aren't + using any other metrics context then you can turn on the viewing and averaging + of sampled metrics by specifying the following two lines + in the hadoop-meterics.properties file: +

    +        dfs.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
    +        dfs.period=10
    +  
    +

    + Note that the metrics are collected regardless of the context used. + The context with the update thread is used to average the data periodically + + + + Impl details: We use a dynamic mbean that gets the list of the metrics + from the metrics registry passed as an argument to the constructor]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class has a number of metrics variables that are publicly accessible; + these variables (objects) have methods to update their values; + for example: +

    {@link #syncs}.inc()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Returned to the backup node by the name-node as a reply to the + {@link NamenodeProtocol#startCheckpoint(NamenodeRegistration)} + request.
    + Contains: +

      +
    • {@link CheckpointSignature} identifying the particular checkpoint
    • +
    • indicator whether the backup image should be discarded before starting + the checkpoint
    • +
    • indicator whether the image should be transfered back to the name-node + upon completion of the checkpoint.
    • +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + datanode + whose total size equals size. + + @see org.apache.hadoop.hdfs.server.balancer.Balancer + @param datanode a data node + @param size requested size + @return a list of blocks & their locations + @throws RemoteException if size is less than or equal to 0 or + datanode does not exist]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The tool scans all files and directories, starting from an indicated + root path. The following abnormal conditions are detected and handled:

    +
      +
    • files with blocks that are completely missing from all datanodes.
      + In this case the tool can perform one of the following actions: +
        +
      • none ({@link org.apache.hadoop.hdfs.server.namenode.NamenodeFsck#FIXING_NONE})
      • +
      • move corrupted files to /lost+found directory on DFS + ({@link org.apache.hadoop.hdfs.server.namenode.NamenodeFsck#FIXING_MOVE}). Remaining data blocks are saved as a + block chains, representing longest consecutive series of valid blocks.
      • +
      • delete corrupted files ({@link org.apache.hadoop.hdfs.server.namenode.NamenodeFsck#FIXING_DELETE})
      • +
      +
    • +
    • detect files with under-replicated or over-replicated blocks
    • +
    + Additionally, the tool collects a detailed overall DFS statistics, and + optionally can print detailed statistics on block locations and replication + factors of each file. + The tool also provides and option to filter open files during the scan.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    diff --git a/aarch64/share/hadoop/hdfs/jdiff/hadoop-hdfs_0.22.0.xml b/aarch64/share/hadoop/hdfs/jdiff/hadoop-hdfs_0.22.0.xml new file mode 100644 index 0000000..cb0d4f0 --- /dev/null +++ b/aarch64/share/hadoop/hdfs/jdiff/hadoop-hdfs_0.22.0.xml @@ -0,0 +1,18589 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    block
    +
    The hdfs block, typically large (~64MB). +
    +
    chunk
    +
    A block is divided into chunks, each comes with a checksum. + We want transfers to be chunk-aligned, to be able to + verify checksums. +
    +
    packet
    +
    A grouping of chunks used for transport. It contains a + header, followed by checksum data, followed by real data. +
    + + Please see DataNode for the RPC specification.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + replication and blockSize and null + progress.]]> + + + + + + + + + replication and blockSize.]]> + + + + + + + + + + progress.]]> + + + + + + + + + + + + + + + + + + + + + + + permission + {@link FsPermission#getDefault()}. + + @param src File name + @param overwrite overwrite an existing file if true + @param replication replication factor for the file + @param blockSize maximum block size + @param progress interface for reporting client progress + @param buffersize underlying buffersize + + @return output stream]]> + + + + + + + + + + + + + createParent set to true.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @throws IOException + @deprecated use {@link #getDelegationToken(String)}]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A distributed implementation of {@link +org.apache.hadoop.fs.FileSystem}. This is loosely modelled after +Google's GFS.

    + +

    The most important difference is that unlike GFS, Hadoop DFS files +have strictly one writer at any one time. Bytes are always appended +to the end of the writer's stream. There is no notion of "record appends" +or "mutations" that are then checked or reordered. Writers simply emit +a byte stream. That byte stream is guaranteed to be stored in the +order written.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Return {@link LocatedBlocks} which contains + file length, blocks and their locations. + DataNode locations for each block are sorted by + the distance to the client's address. +

    + The client will then have to contact + one of the indicated DataNodes to obtain the actual data. + + @param src file name + @param offset range start offset + @param length range length + + @return file length and array of blocks with their locations + + @throws AccessControlException If access is denied + @throws FileNotFoundException If file src does not exist + @throws UnresolvedLinkException If src contains a symlink + @throws IOException If an I/O error occurred]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This will create an empty file specified by the source path. + The path should reflect a full path originated at the root. + The name-node does not have a notion of "current" directory for a client. +

    + Once created, the file is visible and available for read to other clients. + Although, other clients cannot {@link #delete(String, boolean)}, re-create or + {@link #rename(String, String)} it until the file is completed + or explicitly as a result of lease expiration. +

    + Blocks have a maximum size. Clients that intend to create + multi-block files must also use + {@link #addBlock(String, String, Block, DatanodeInfo[])} + + @param src path of the file being created. + @param masked masked permission. + @param clientName name of the current client. + @param flag indicates whether the file should be + overwritten if it already exists or create if it does not exist or append. + @param createParent create missing parent directory if true + @param replication block replication factor. + @param blockSize maximum block size. + + @throws AccessControlException If access is denied + @throws AlreadyBeingCreatedException if the path does not exist. + @throws DSQuotaExceededException If file creation violates disk space + quota restriction + @throws FileAlreadyExistsException If file src already exists + @throws FileNotFoundException If parent of src does not exist + and createParent is false + @throws ParentNotDirectoryException If parent of src is not a + directory. + @throws NSQuotaExceededException If file creation violates name space + quota restriction + @throws SafeModeException create not allowed in safemode + @throws UnresolvedLinkException If src contains a symlink + @throws IOException If an I/O error occurred + + RuntimeExceptions: + @throws InvalidPathException Path src is invalid]]> + + + + + + + + + + + + + src is not found + @throws DSQuotaExceededException If append violates disk space quota + restriction + @throws SafeModeException append not allowed in safemode + @throws UnresolvedLinkException If src contains a symlink + @throws IOException If an I/O error occurred. + + RuntimeExceptions: + @throws UnsupportedOperationException if append is not supported]]> + + + + + + + + + + + + + + The NameNode sets replication to the new value and returns. + The actual block replication is not expected to be performed during + this method call. The blocks will be populated or removed in the + background as the result of the routine block maintenance procedures. + + @param src file name + @param replication new replication + + @return true if successful; + false if file does not exist or is a directory + + @throws AccessControlException If access is denied + @throws DSQuotaExceededException If replication violates disk space + quota restriction + @throws FileNotFoundException If file src is not found + @throws SafeModeException not allowed in safemode + @throws UnresolvedLinkException if src contains a symlink + @throws IOException If an I/O error occurred]]> + + + + + + + + + + + + src is not found + @throws SafeModeException not allowed in safemode + @throws UnresolvedLinkException If src contains a symlink + @throws IOException If an I/O error occurred]]> + + + + + + + + + + + + + src is not found + @throws SafeModeException not allowed in safemode + @throws UnresolvedLinkException If src contains a symlink + @throws IOException If an I/O error occurred]]> + + + + + + + + + + + + src is not found + @throws UnresolvedLinkException If src contains a symlink + @throws IOException If an I/O error occurred]]> + + + + + + + + + + + + + + + src is not found + @throws NotReplicatedYetException previous blocks of the file are not + replicated yet. Blocks cannot be added until replication + completes. + @throws SafeModeException create not allowed in safemode + @throws UnresolvedLinkException If src contains a symlink + @throws IOException If an I/O error occurred]]> + + + + + + + + + + + + + src is not found + @throws SafeModeException create not allowed in safemode + @throws UnresolvedLinkException If src contains a symlink + @throws IOException If an I/O error occurred]]> + + + + + + + + + + + + + + + + + + + + + + + + + trg or srcs + contains a symlink]]> + + + + + + + + + + + + + + + + + +

  • Fails if src is a file and dst is a directory. +
  • Fails if src is a directory and dst is a file. +
  • Fails if the parent of dst does not exist or is a file. + +

    + Without OVERWRITE option, rename fails if the dst already exists. + With OVERWRITE option, rename overwrites the dst, if it is a file + or an empty directory. Rename fails if dst is a non-empty directory. +

    + This implementation of rename is atomic. +

    + @param src existing file or directory name. + @param dst new name. + @param options Rename options + + @throws AccessControlException If access is denied + @throws DSQuotaExceededException If rename violates disk space + quota restriction + @throws FileAlreadyExistsException If dst already exists and + options has {@link Rename#OVERWRITE} option + false. + @throws FileNotFoundException If src does not exist + @throws NSQuotaExceededException If rename violates namespace + quota restriction + @throws ParentNotDirectoryException If parent of dst + is not a directory + @throws SafeModeException rename not allowed in safemode + @throws UnresolvedLinkException If src or + dst contains a symlink + @throws IOException If an I/O error occurred]]> + + + + + + + + + Any blocks belonging to the deleted files will be garbage-collected. + + @param src existing name. + @return true only if the existing file or directory was actually removed + from the file system. + @throws UnresolvedLinkException if src contains a symlink. + @deprecated use {@link #delete(String, boolean)} istead.]]> + + + + + + + + + + + + + same as delete but provides a way to avoid accidentally + deleting non empty directories programmatically. + @param src existing name + @param recursive if true deletes a non empty directory recursively, + else throws an exception. + @return true only if the existing file or directory was actually removed + from the file system. + + @throws AccessControlException If access is denied + @throws FileNotFoundException If file src is not found + @throws SafeModeException create not allowed in safemode + @throws UnresolvedLinkException If src contains a symlink + @throws IOException If an I/O error occurred]]> + + + + + + + + + + + + + + + + src already exists + @throws FileNotFoundException If parent of src does not exist + and createParent is false + @throws NSQuotaExceededException If file creation violates quota restriction + @throws ParentNotDirectoryException If parent of src + is not a directory + @throws SafeModeException create not allowed in safemode + @throws UnresolvedLinkException If src contains a symlink + @throws IOException If an I/O error occurred. + + RunTimeExceptions: + @throws InvalidPathException If src is invalid]]> + + + + + + + + + + + + src is not found + @throws UnresolvedLinkException If src contains a symlink + @throws IOException If an I/O error occurred]]> + + + + + + + + + So, the NameNode will revoke the locks and live file-creates + for clients that it thinks have died. A client tells the + NameNode that it is still alive by periodically calling + renewLease(). If a certain amount of time passes since + the last call to renewLease(), the NameNode assumes the + client has died. + + @throws AccessControlException permission denied + @throws IOException If an I/O error occurred]]> + + + + + + + + + + + + + + +

  • [0] contains the total storage capacity of the system, in bytes.
  • +
  • [1] contains the total used space of the system, in bytes.
  • +
  • [2] contains the available storage of the system, in bytes.
  • +
  • [3] contains number of under replicated blocks in the system.
  • +
  • [4] contains number of blocks with a corrupt replica.
  • +
  • [5] contains number of blocks without any good replicas left.
  • + + Use public constants like {@link #GET_STATS_CAPACITY_IDX} in place of + actual numbers to index into the array.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + Safe mode is a name node state when it +
    1. does not accept changes to name space (read-only), and
    2. +
    3. does not replicate or delete blocks.
    + +

    + Safe mode is entered automatically at name node startup. + Safe mode can also be entered manually using + {@link #setSafeMode(FSConstants.SafeModeAction) setSafeMode(SafeModeAction.SAFEMODE_GET)}. +

    + At startup the name node accepts data node reports collecting + information about block locations. + In order to leave safe mode it needs to collect a configurable + percentage called threshold of blocks, which satisfy the minimal + replication condition. + The minimal replication condition is that each block must have at least + dfs.namenode.replication.min replicas. + When the threshold is reached the name node extends safe mode + for a configurable amount of time + to let the remaining data nodes to check in before it + will start replicating missing blocks. + Then the name node leaves safe mode. +

    + If safe mode is turned on manually using + {@link #setSafeMode(FSConstants.SafeModeAction) setSafeMode(SafeModeAction.SAFEMODE_ENTER)} + then the name node stays in safe mode until it is manually turned off + using {@link #setSafeMode(FSConstants.SafeModeAction) setSafeMode(SafeModeAction.SAFEMODE_LEAVE)}. + Current state of the name node can be verified using + {@link #setSafeMode(FSConstants.SafeModeAction) setSafeMode(SafeModeAction.SAFEMODE_GET)} +

    Configuration parameters:

    + dfs.safemode.threshold.pct is the threshold parameter.
    + dfs.safemode.extension is the safe mode extension parameter.
    + dfs.namenode.replication.min is the minimal replication parameter. + +

    Special cases:

    + The name node does not enter safe mode at startup if the threshold is + set to 0 or if the name space is empty.
    + If the threshold is set to 1 then all blocks need to have at least + minimal replication.
    + If the threshold value is greater than 1 then the name node will not be + able to turn off safe mode automatically.
    + Safe mode can always be turned off manually. + + @param action
    • 0 leave safe mode;
    • +
    • 1 enter safe mode;
    • +
    • 2 get safe mode state.
    + @return
    • 0 if the safe mode is OFF or
    • +
    • 1 if the safe mode is ON.
    + + @throws IOException]]> +
    +
    + + + + + + Saves current namespace into storage directories and reset edits log. + Requires superuser privilege and safe mode. + + @throws AccessControlException if the superuser privilege is violated. + @throws IOException if image creation failed.]]> + + + + + + + + sets flag to enable restore of failed storage replicas + + @throws AccessControlException if the superuser privilege is violated.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + src is not found + @throws UnresolvedLinkException if the path contains a symlink. + @throws IOException If an I/O error occurred]]> + + + + + + + + + src contains a symlink + @throws IOException If an I/O error occurred]]> + + + + + + + + + + path is not found + @throws UnresolvedLinkException if path contains a symlink. + @throws IOException If an I/O error occurred]]> + + + + + + + + + + + +
    + + The quota can have three types of values : (1) 0 or more will set + the quota to that value, (2) {@link FSConstants#QUOTA_DONT_SET} implies + the quota will not be changed, and (3) {@link FSConstants#QUOTA_RESET} + implies the quota will be reset. Any other value is a runtime error. + + @throws AccessControlException permission denied + @throws FileNotFoundException file path is not found + @throws QuotaExceededException if the directory size + is greater than the given quota + @throws UnresolvedLinkException if the path contains a symlink. + @throws IOException If an I/O error occurred]]> +
    +
    + + + + + + + + + src is not found + @throws UnresolvedLinkException if src contains a symlink. + @throws IOException If an I/O error occurred]]> + + + + + + + + + + + + src is not found + @throws UnresolvedLinkException if src contains a symlink. + @throws IOException If an I/O error occurred]]> + + + + + + + + + + + + + + + + link already exists + @throws FileNotFoundException If parent of link does not exist + and createParent is false + @throws ParentNotDirectoryException If parent of link is not a + directory. + @throws UnresolvedLinkException if link contains a symlink. + @throws IOException If an I/O error occurred]]> + + + + + + + + + path does not exist + @throws IOException If the given path does not refer to a symlink + or an I/O error occurred]]> + + + + + + + + + + + + + + + + + + + + + + + + + + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • The layout of how namenode or datanode stores information + on disk changes.
  • +
  • A new operation code is added to the editlog.
  • +
  • Modification such as format of a record, content of a record + in editlog or fsimage.
  • + +
    + How to update layout version:
    + When a change requires new layout version, please add an entry into + {@link Feature} with a short enum name, new layout version and description + of the change. Please see {@link Feature} for further details. +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + To add a new layout version: +
      +
    • Define a new enum constant with a short enum name, the new layout version + and description of the added feature.
    • +
    • When adding a layout version with an ancestor that is not same as + its immediate predecessor, use the constructor where a spacific ancestor + can be passed. +
    • +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + The message for the exception specifies the directory where the quota + was violated and actual quotas. Specific message is generated in the + corresponding Exception class: + DSQuotaExceededException or + NSQuotaExceededException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The balancer is a tool that balances disk space usage on an HDFS cluster + when some datanodes become full or when new empty nodes join the cluster. + The tool is deployed as an application program that can be run by the + cluster administrator on a live HDFS cluster while applications + adding and deleting files. + +

    SYNOPSIS +

    + To start:
    +      bin/start-balancer.sh [-threshold ]
    +      Example: bin/ start-balancer.sh 
    +                     start the balancer with a default threshold of 10%
    +               bin/ start-balancer.sh -threshold 5
    +                     start the balancer with a threshold of 5%
    + To stop:
    +      bin/ stop-balancer.sh
    + 
    + +

    DESCRIPTION +

    The threshold parameter is a fraction in the range of (0%, 100%) with a + default value of 10%. The threshold sets a target for whether the cluster + is balanced. A cluster is balanced if for each datanode, the utilization + of the node (ratio of used space at the node to total capacity of the node) + differs from the utilization of the (ratio of used space in the cluster + to total capacity of the cluster) by no more than the threshold value. + The smaller the threshold, the more balanced a cluster will become. + It takes more time to run the balancer for small threshold values. + Also for a very small threshold the cluster may not be able to reach the + balanced state when applications write and delete files concurrently. + +

    The tool moves blocks from highly utilized datanodes to poorly + utilized datanodes iteratively. In each iteration a datanode moves or + receives no more than the lesser of 10G bytes or the threshold fraction + of its capacity. Each iteration runs no more than 20 minutes. + At the end of each iteration, the balancer obtains updated datanodes + information from the namenode. + +

    A system property that limits the balancer's use of bandwidth is + defined in the default configuration file: +

    + 
    +   dfs.balance.bandwidthPerSec
    +   1048576
    +   Specifies the maximum bandwidth that each datanode 
    + can utilize for the balancing purpose in term of the number of bytes 
    + per second. 
    + 
    + 
    + +

    This property determines the maximum speed at which a block will be + moved from one datanode to another. The default value is 1MB/s. The higher + the bandwidth, the faster a cluster can reach the balanced state, + but with greater competition with application processes. If an + administrator changes the value of this property in the configuration + file, the change is observed when HDFS is next restarted. + +

    MONITERING BALANCER PROGRESS +

    After the balancer is started, an output file name where the balancer + progress will be recorded is printed on the screen. The administrator + can monitor the running of the balancer by reading the output file. + The output shows the balancer's status iteration by iteration. In each + iteration it prints the starting time, the iteration number, the total + number of bytes that have been moved in the previous iterations, + the total number of bytes that are left to move in order for the cluster + to be balanced, and the number of bytes that are being moved in this + iteration. Normally "Bytes Already Moved" is increasing while "Bytes Left + To Move" is decreasing. + +

    Running multiple instances of the balancer in an HDFS cluster is + prohibited by the tool. + +

    The balancer automatically exits when any of the following five + conditions is satisfied: +

      +
    1. The cluster is balanced; +
    2. No block can be moved; +
    3. No block has been moved for five consecutive iterations; +
    4. An IOException occurs while communicating with the namenode; +
    5. Another balancer is running. +
    + +

    Upon exit, a balancer returns an exit code and prints one of the + following messages to the output file in corresponding to the above exit + reasons: +

      +
    1. The cluster is balanced. Exiting +
    2. No block can be moved. Exiting... +
    3. No block has been moved for 3 iterations. Exiting... +
    4. Received an IO exception: failure reason. Exiting... +
    5. Another balancer is running. Exiting... +
    + +

    The administrator can interrupt the execution of the balancer at any + time by running the command "stop-balancer.sh" on the machine where the + balancer is running.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The block has at least one {@link ReplicaState#FINALIZED} replica, + and is not going to be modified.]]> + + + + + + It has been recently allocated for write or append.]]> + + + + + + When a file lease expires its last block may not be {@link #COMPLETE} + and needs to go through a recovery procedure, + which synchronizes the existing replicas contents.]]> + + + + + + The client reported that all bytes are written to data-nodes + with the given generation stamp and block length, but no + {@link ReplicaState#FINALIZED} + replicas has yet been reported by data-nodes themselves.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if exclusive locks are supported or + false otherwise. + @throws IOException + @see StorageDirectory#lock()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Local storage information is stored in a separate file VERSION. + It contains type of the node, + the storage layout version, the namespace id, and + the fs state creation time. +

    + Local storage can reside in multiple directories. + Each directory should contain the same VERSION file as the others. + During startup Hadoop servers (name-node and data-nodes) read their local + storage information from them. +

    + The servers hold a lock for each storage directory while they run so that + other nodes were not able to startup sharing the same storage. + The locks are released when the servers stop (normally or abnormally).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Removes contents of the current directory and creates an empty directory. + + This does not fully format storage directory. + It cannot write the version file since it should be written last after + all other storage type dependent files are written. + Derived storage is responsible for setting specific storage values and + writing the version file to disk. + + @throws IOException]]> + + + + + + + + + + +

  • node type
  • +
  • layout version
  • +
  • namespaceID
  • +
  • fs state creation time
  • +
  • other fields specific for this node type
  • + + The version file is always written last during storage directory updates. + The existence of the version file indicates that all other files have + been successfully written in the storage directory, the storage is valid + and does not need to be recovered. + + @return the version file path]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Locking is not supported by all file systems. + E.g., NFS does not consistently support exclusive locks. + +

    If locking is supported we guarantee exculsive access to the + storage directory. Otherwise, no guarantee is given. + + @throws IOException if locking fails]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Assigned to the file system at formatting and never changes after that. + Shared by all file system components.]]> + + + + + + Modified during upgrades.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + stream of bytes (of BLOCK_SIZE or less) + + This info is stored on a local disk. The DataNode + reports the table's contents to the NameNode upon startup + and every so often afterwards. + + DataNodes spend their lives in an endless loop of asking + the NameNode for something to do. A NameNode cannot connect + to a DataNode directly; a NameNode simply returns values from + functions invoked by a DataNode. + + DataNodes maintain an open server socket so that client code + or other DataNodes can read/write data. The host/port for + this server is reported to the NameNode, which then sends that + information to clients or other DataNodes that might be interested.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @see Storage]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

  • If the block file is missing, delete the block from volumeMap
  • +
  • If the block file exists and the block is missing in volumeMap, + add the block to volumeMap
  • +
  • If generation stamp does not match, then update the block with right + generation stamp
  • +
  • If the block length in memory does not match the actual block file length + then mark the block as corrupt and update the block length in memory
  • +
  • If the file in {@link ReplicaInfo} does not match the file on + the disk, update {@link ReplicaInfo} with the correct file
  • + + + @param blockId Block that differs + @param diskFile Block file on the disk + @param diskMetaFile Metadata file from on the disk + @param vol Volume of the block file]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + -" + + Many of the activity metrics are sampled and averaged on an interval + which can be specified in the metrics config file. +

    + For the metrics that are sampled and averaged, one must specify + a metrics context that does periodic update calls. Most metrics contexts do. + The default Null metrics context however does NOT. So if you aren't + using any other metrics context then you can turn on the viewing and averaging + of sampled metrics by specifying the following two lines + in the hadoop-meterics.properties file: +

    +        dfs.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
    +        dfs.period=10
    +  
    +

    + Note that the metrics are collected regardless of the context used. + The context with the update thread is used to average the data periodically + + + + Impl details: We use a dynamic mbean that gets the list of the metrics + from the metrics registry passed as an argument to the constructor]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class has a number of metrics variables that are publicly accessible; + these variables (objects) have methods to update their values; + for example: +

    {@link #blocksRead}.inc()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Data Node runtime statistic info is report in another MBean + @see org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeActivityMBean]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Backup node can play two roles. +

      +
    1. {@link NamenodeRole#CHECKPOINT} node periodically creates checkpoints, + that is downloads image and edits from the active node, merges them, and + uploads the new image back to the active.
    2. +
    3. {@link NamenodeRole#BACKUP} node keeps its namespace in sync with the + active node, and periodically creates checkpoints by simply saving the + namespace image to local disk(s).
    4. +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + blocklist) and (block-->machinelist) tables.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + TreeSet]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + :/data[/] HTTP/1.1 + }]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + =0.16)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The purpose of registration is to identify whether the new datanode + serves a new data storage, and will report new data block copies, + which the namenode was not aware of; or the datanode is a replacement + node for the data storage that was previously served by a different + or the same (in terms of host:port) datanode. + The data storages are distinguished by their storageIDs. When a new + data storage is reported the namenode issues a new unique storageID. +

    + Finally, the namenode returns its namespaceID as the registrationID + for the datanodes. + namespaceID is a persistent attribute of the name space. + The registrationID is checked every time the datanode is communicating + with the namenode. + Datanodes with inappropriate registrationID are rejected. + If the namenode stops, and then restarts it can restore its + namespaceID and will continue serving the datanodes that has previously + registered with the namenode without restarting the whole cluster. + + @see org.apache.hadoop.hdfs.server.datanode.DataNode#register()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + blocklist) and (block-->machinelist) tables.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + no further work needed here. + 2. Removed from hosts --> mark AdminState as decommissioned. + 3. Added to exclude --> start decommission. + 4. Removed from exclude --> stop decommission.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key=value pairs to be written for the following properties: + + ugi=<ugi in RPC> + ip=<remote IP> + cmd=<command> + src=<src path> + dst=<dst path (optional)> + perm=<permissions (optional)> + ]]> + + + + + + + + + + + + blocklist (kept on disk, logged) + 2) Set of all valid blocks (inverted #1) + 3) block --> machinelist (kept in memory, rebuilt dynamically from reports) + 4) machine --> blocklist (inverted #2) + 5) LRU cache of updated-heartbeat machines]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + :/listPaths[/][[&option]*] HTTP/1.1 + } + + Where option (default) in: + recursive ("no") + filter (".*") + exclude ("\..*\.crc") + + Response: A flat list of files/directories in the following format: + {@code +

    + + + + }]]> +
    +
    + + + + + +
    + + + + + + + + + + The name-node can be started with one of the following startup options: +
      +
    • {@link StartupOption#REGULAR REGULAR} - normal name node startup
    • +
    • {@link StartupOption#FORMAT FORMAT} - format name node
    • +
    • {@link StartupOption#BACKUP BACKUP} - start backup node
    • +
    • {@link StartupOption#CHECKPOINT CHECKPOINT} - start checkpoint node
    • +
    • {@link StartupOption#UPGRADE UPGRADE} - start the cluster + upgrade and create a snapshot of the current file system state
    • +
    • {@link StartupOption#ROLLBACK ROLLBACK} - roll the + cluster back to the previous state
    • +
    • {@link StartupOption#FINALIZE FINALIZE} - finalize + previous upgrade
    • +
    • {@link StartupOption#IMPORT IMPORT} - import checkpoint
    • +
    + The option is passed via configuration field: + dfs.namenode.startup + + The conf will be modified to reflect the actual ports on which + the NameNode is up and running if the user passes the port as + zero in the conf. + + @param conf confirguration + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + blocksequence (namespace) + 2) block->machinelist ("inodes") + + The first table is stored on disk and is very precious. + The second table is rebuilt every time the NameNode comes + up. + + 'NameNode' refers to both this class as well as the 'NameNode server'. + The 'FSNamesystem' class actually performs most of the filesystem + management. The majority of the 'NameNode' class itself is concerned + with exposing the IPC interface and the http server to the outside world, + plus some configuration management. + + NameNode implements the ClientProtocol interface, which allows + clients to ask for DFS services. ClientProtocol is not + designed for direct use by authors of DFS client code. End-users + should instead use the org.apache.nutch.hadoop.fs.FileSystem class. + + NameNode also implements the DatanodeProtocol interface, used by + DataNode programs that actually store DFS data blocks. These + methods are invoked repeatedly and automatically by all the + DataNodes in a DFS deployment. + + NameNode also implements the NamenodeProtocol interface, used by + secondary namenodes or rebalancing processes to get partial namenode's + state, for example partial blocksMap etc.]]> + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The tool scans all files and directories, starting from an indicated + root path. The following abnormal conditions are detected and handled:

    +
      +
    • files with blocks that are completely missing from all datanodes.
      + In this case the tool can perform one of the following actions: +
        +
      • none ({@link #FIXING_NONE})
      • +
      • move corrupted files to /lost+found directory on DFS + ({@link #FIXING_MOVE}). Remaining data blocks are saved as a + block chains, representing longest consecutive series of valid blocks.
      • +
      • delete corrupted files ({@link #FIXING_DELETE})
      • +
      +
    • +
    • detect files with under-replicated or over-replicated blocks
    • +
    + Additionally, the tool collects a detailed overall DFS statistics, and + optionally can print detailed statistics on block locations and replication + factors of each file.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Name Node runtime activity statistic info is report in another MBean + @see org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeActivityMBean]]> + + + + + + + + + + + + + + + + + + + This class has a number of metrics variables that are publicly accessible; + these variables (objects) have methods to update their values; + for example: +

    {@link #filesTotal}.set()]]> + + + + + + + + + + + + For the metrics that are sampled and averaged, one must specify + a metrics context that does periodic update calls. Most metrics contexts do. + The default Null metrics context however does NOT. So if you aren't + using any other metrics context then you can turn on the viewing and averaging + of sampled metrics by specifying the following two lines + in the hadoop-meterics.properties file: +

    +        dfs.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
    +        dfs.period=10
    +  
    +

    + Note that the metrics are collected regardless of the context used. + The context with the update thread is used to average the data periodically + + + + Impl details: We use a dynamic mbean that gets the list of the metrics + from the metrics registry passed as an argument to the constructor]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class has a number of metrics variables that are publicly accessible; + these variables (objects) have methods to update their values; + for example: +

    {@link #syncs}.inc()]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Returned to the backup node by the name-node as a reply to the + {@link NamenodeProtocol#startCheckpoint(NamenodeRegistration)} + request.
    + Contains: +

      +
    • {@link CheckpointSignature} identifying the particular checkpoint
    • +
    • indicator whether the backup image should be discarded before starting + the checkpoint
    • +
    • indicator whether the image should be transfered back to the name-node + upon completion of the checkpoint.
    • +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + datanode + whose total size equals size. + + @see org.apache.hadoop.hdfs.server.balancer.Balancer + @param datanode a data node + @param size requested size + @return a list of blocks & their locations + @throws RemoteException if size is less than or equal to 0 or + datanode does not exist]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The tool scans all files and directories, starting from an indicated + root path. The following abnormal conditions are detected and handled:

    +
      +
    • files with blocks that are completely missing from all datanodes.
      + In this case the tool can perform one of the following actions: +
        +
      • none ({@link org.apache.hadoop.hdfs.server.namenode.NamenodeFsck#FIXING_NONE})
      • +
      • move corrupted files to /lost+found directory on DFS + ({@link org.apache.hadoop.hdfs.server.namenode.NamenodeFsck#FIXING_MOVE}). Remaining data blocks are saved as a + block chains, representing longest consecutive series of valid blocks.
      • +
      • delete corrupted files ({@link org.apache.hadoop.hdfs.server.namenode.NamenodeFsck#FIXING_DELETE})
      • +
      +
    • +
    • detect files with under-replicated or over-replicated blocks
    • +
    + Additionally, the tool collects a detailed overall DFS statistics, and + optionally can print detailed statistics on block locations and replication + factors of each file. + The tool also provides and option to filter open files during the scan.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + It analyzes file names in fsimage and prints the following information: +
  • Number of unique file names
  • +
  • Number file names and the corresponding number range of files that use + these same names
  • +
  • Heap saved if the file name objects are reused
  • ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The type of the keys. + @param The type of the elements, which must be a subclass of the keys.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Key type for looking up the elements + @param Element type, which must be + (1) a subclass of K, and + (2) implementing {@link LinkedElement} interface.]]> + + + + + + + + + + + + + + + + + + + + + + + +
    diff --git a/aarch64/share/hadoop/hdfs/lib/asm-3.2.jar b/aarch64/share/hadoop/hdfs/lib/asm-3.2.jar new file mode 100644 index 0000000..ca9f8d2 Binary files /dev/null and b/aarch64/share/hadoop/hdfs/lib/asm-3.2.jar differ diff --git a/aarch64/share/hadoop/hdfs/lib/commons-cli-1.2.jar b/aarch64/share/hadoop/hdfs/lib/commons-cli-1.2.jar new file mode 100644 index 0000000..ce4b9ff Binary files /dev/null and b/aarch64/share/hadoop/hdfs/lib/commons-cli-1.2.jar differ diff --git a/aarch64/share/hadoop/hdfs/lib/commons-codec-1.4.jar b/aarch64/share/hadoop/hdfs/lib/commons-codec-1.4.jar new file mode 100644 index 0000000..458d432 Binary files /dev/null and b/aarch64/share/hadoop/hdfs/lib/commons-codec-1.4.jar differ diff --git a/aarch64/share/hadoop/hdfs/lib/commons-daemon-1.0.13.jar b/aarch64/share/hadoop/hdfs/lib/commons-daemon-1.0.13.jar new file mode 100644 index 0000000..ac77321 Binary files /dev/null and b/aarch64/share/hadoop/hdfs/lib/commons-daemon-1.0.13.jar differ diff --git a/aarch64/share/hadoop/hdfs/lib/commons-el-1.0.jar b/aarch64/share/hadoop/hdfs/lib/commons-el-1.0.jar new file mode 100644 index 0000000..608ed79 Binary files /dev/null and b/aarch64/share/hadoop/hdfs/lib/commons-el-1.0.jar differ diff --git a/aarch64/share/hadoop/hdfs/lib/commons-io-2.1.jar b/aarch64/share/hadoop/hdfs/lib/commons-io-2.1.jar new file mode 100644 index 0000000..b5c7d69 Binary files /dev/null and b/aarch64/share/hadoop/hdfs/lib/commons-io-2.1.jar differ diff --git a/aarch64/share/hadoop/hdfs/lib/commons-lang-2.5.jar b/aarch64/share/hadoop/hdfs/lib/commons-lang-2.5.jar new file mode 100644 index 0000000..ae491da Binary files /dev/null and b/aarch64/share/hadoop/hdfs/lib/commons-lang-2.5.jar differ diff --git a/aarch64/share/hadoop/hdfs/lib/commons-logging-1.1.1.jar b/aarch64/share/hadoop/hdfs/lib/commons-logging-1.1.1.jar new file mode 100644 index 0000000..1deef14 Binary files /dev/null and b/aarch64/share/hadoop/hdfs/lib/commons-logging-1.1.1.jar differ diff --git a/aarch64/share/hadoop/hdfs/lib/guava-11.0.2.jar b/aarch64/share/hadoop/hdfs/lib/guava-11.0.2.jar new file mode 100644 index 0000000..c8c8d5d Binary files /dev/null and b/aarch64/share/hadoop/hdfs/lib/guava-11.0.2.jar differ diff --git a/aarch64/share/hadoop/hdfs/lib/jackson-core-asl-1.8.8.jar b/aarch64/share/hadoop/hdfs/lib/jackson-core-asl-1.8.8.jar new file mode 100644 index 0000000..05f3353 Binary files /dev/null and b/aarch64/share/hadoop/hdfs/lib/jackson-core-asl-1.8.8.jar differ diff --git a/aarch64/share/hadoop/hdfs/lib/jackson-mapper-asl-1.8.8.jar b/aarch64/share/hadoop/hdfs/lib/jackson-mapper-asl-1.8.8.jar new file mode 100644 index 0000000..7c7cd21 Binary files /dev/null and b/aarch64/share/hadoop/hdfs/lib/jackson-mapper-asl-1.8.8.jar differ diff --git a/aarch64/share/hadoop/hdfs/lib/jasper-runtime-5.5.23.jar b/aarch64/share/hadoop/hdfs/lib/jasper-runtime-5.5.23.jar new file mode 100644 index 0000000..a3208c9 Binary files /dev/null and b/aarch64/share/hadoop/hdfs/lib/jasper-runtime-5.5.23.jar differ diff --git a/aarch64/share/hadoop/hdfs/lib/jersey-core-1.9.jar b/aarch64/share/hadoop/hdfs/lib/jersey-core-1.9.jar new file mode 100644 index 0000000..548dd88 Binary files /dev/null and b/aarch64/share/hadoop/hdfs/lib/jersey-core-1.9.jar differ diff --git a/aarch64/share/hadoop/hdfs/lib/jersey-server-1.9.jar b/aarch64/share/hadoop/hdfs/lib/jersey-server-1.9.jar new file mode 100644 index 0000000..ae0117c Binary files /dev/null and b/aarch64/share/hadoop/hdfs/lib/jersey-server-1.9.jar differ diff --git a/aarch64/share/hadoop/hdfs/lib/jetty-6.1.26.jar b/aarch64/share/hadoop/hdfs/lib/jetty-6.1.26.jar new file mode 100644 index 0000000..2cbe07a Binary files /dev/null and b/aarch64/share/hadoop/hdfs/lib/jetty-6.1.26.jar differ diff --git a/aarch64/share/hadoop/hdfs/lib/jetty-util-6.1.26.jar b/aarch64/share/hadoop/hdfs/lib/jetty-util-6.1.26.jar new file mode 100644 index 0000000..cd23752 Binary files /dev/null and b/aarch64/share/hadoop/hdfs/lib/jetty-util-6.1.26.jar differ diff --git a/aarch64/share/hadoop/hdfs/lib/jsp-api-2.1.jar b/aarch64/share/hadoop/hdfs/lib/jsp-api-2.1.jar new file mode 100644 index 0000000..c0195af Binary files /dev/null and b/aarch64/share/hadoop/hdfs/lib/jsp-api-2.1.jar differ diff --git a/aarch64/share/hadoop/hdfs/lib/jsr305-1.3.9.jar b/aarch64/share/hadoop/hdfs/lib/jsr305-1.3.9.jar new file mode 100644 index 0000000..a9afc66 Binary files /dev/null and b/aarch64/share/hadoop/hdfs/lib/jsr305-1.3.9.jar differ diff --git a/aarch64/share/hadoop/hdfs/lib/log4j-1.2.17.jar b/aarch64/share/hadoop/hdfs/lib/log4j-1.2.17.jar new file mode 100644 index 0000000..1d425cf Binary files /dev/null and b/aarch64/share/hadoop/hdfs/lib/log4j-1.2.17.jar differ diff --git a/aarch64/share/hadoop/hdfs/lib/netty-3.6.2.Final.jar b/aarch64/share/hadoop/hdfs/lib/netty-3.6.2.Final.jar new file mode 100644 index 0000000..a421e28 Binary files /dev/null and b/aarch64/share/hadoop/hdfs/lib/netty-3.6.2.Final.jar differ diff --git a/aarch64/share/hadoop/hdfs/lib/protobuf-java-2.5.0.jar b/aarch64/share/hadoop/hdfs/lib/protobuf-java-2.5.0.jar new file mode 100644 index 0000000..4c4e686 Binary files /dev/null and b/aarch64/share/hadoop/hdfs/lib/protobuf-java-2.5.0.jar differ diff --git a/aarch64/share/hadoop/hdfs/lib/servlet-api-2.5.jar b/aarch64/share/hadoop/hdfs/lib/servlet-api-2.5.jar new file mode 100644 index 0000000..fb52493 Binary files /dev/null and b/aarch64/share/hadoop/hdfs/lib/servlet-api-2.5.jar differ diff --git a/aarch64/share/hadoop/hdfs/lib/xmlenc-0.52.jar b/aarch64/share/hadoop/hdfs/lib/xmlenc-0.52.jar new file mode 100644 index 0000000..ec568b4 Binary files /dev/null and b/aarch64/share/hadoop/hdfs/lib/xmlenc-0.52.jar differ diff --git a/aarch64/share/hadoop/hdfs/sources/hadoop-hdfs-2.2.0-sources.jar b/aarch64/share/hadoop/hdfs/sources/hadoop-hdfs-2.2.0-sources.jar new file mode 100644 index 0000000..db00ba0 Binary files /dev/null and b/aarch64/share/hadoop/hdfs/sources/hadoop-hdfs-2.2.0-sources.jar differ diff --git a/aarch64/share/hadoop/hdfs/sources/hadoop-hdfs-2.2.0-test-sources.jar b/aarch64/share/hadoop/hdfs/sources/hadoop-hdfs-2.2.0-test-sources.jar new file mode 100644 index 0000000..601db3a Binary files /dev/null and b/aarch64/share/hadoop/hdfs/sources/hadoop-hdfs-2.2.0-test-sources.jar differ diff --git a/aarch64/share/hadoop/hdfs/templates/hdfs-site.xml b/aarch64/share/hadoop/hdfs/templates/hdfs-site.xml new file mode 100644 index 0000000..50ec146 --- /dev/null +++ b/aarch64/share/hadoop/hdfs/templates/hdfs-site.xml @@ -0,0 +1,21 @@ + + + + + + + + + diff --git a/aarch64/share/hadoop/hdfs/webapps/datanode/WEB-INF/web.xml b/aarch64/share/hadoop/hdfs/webapps/datanode/WEB-INF/web.xml new file mode 100644 index 0000000..5e560ca --- /dev/null +++ b/aarch64/share/hadoop/hdfs/webapps/datanode/WEB-INF/web.xml @@ -0,0 +1,59 @@ + + + + + + + + org.apache.hadoop.hdfs.server.datanode.tail_jsp + org.apache.hadoop.hdfs.server.datanode.tail_jsp + + + + org.apache.hadoop.hdfs.server.datanode.browseDirectory_jsp + org.apache.hadoop.hdfs.server.datanode.browseDirectory_jsp + + + + org.apache.hadoop.hdfs.server.datanode.browseBlock_jsp + org.apache.hadoop.hdfs.server.datanode.browseBlock_jsp + + + + org.apache.hadoop.hdfs.server.datanode.tail_jsp + /tail.jsp + + + + org.apache.hadoop.hdfs.server.datanode.browseDirectory_jsp + /browseDirectory.jsp + + + + org.apache.hadoop.hdfs.server.datanode.browseBlock_jsp + /browseBlock.jsp + + + + + diff --git a/aarch64/share/hadoop/hdfs/webapps/datanode/robots.txt b/aarch64/share/hadoop/hdfs/webapps/datanode/robots.txt new file mode 100644 index 0000000..1f53798 --- /dev/null +++ b/aarch64/share/hadoop/hdfs/webapps/datanode/robots.txt @@ -0,0 +1,2 @@ +User-agent: * +Disallow: / diff --git a/aarch64/share/hadoop/hdfs/webapps/hdfs/WEB-INF/web.xml b/aarch64/share/hadoop/hdfs/webapps/hdfs/WEB-INF/web.xml new file mode 100644 index 0000000..aec1197 --- /dev/null +++ b/aarch64/share/hadoop/hdfs/webapps/hdfs/WEB-INF/web.xml @@ -0,0 +1,109 @@ + + + + + + + + org.apache.hadoop.hdfs.server.namenode.block_005finfo_005fxml_jsp + org.apache.hadoop.hdfs.server.namenode.block_005finfo_005fxml_jsp + + + + org.apache.hadoop.hdfs.server.namenode.dfsnodelist_jsp + org.apache.hadoop.hdfs.server.namenode.dfsnodelist_jsp + + + + org.apache.hadoop.hdfs.server.namenode.dfsclusterhealth_jsp + org.apache.hadoop.hdfs.server.namenode.dfsclusterhealth_jsp + + + + org.apache.hadoop.hdfs.server.namenode.dfshealth_jsp + org.apache.hadoop.hdfs.server.namenode.dfshealth_jsp + + + + org.apache.hadoop.hdfs.server.namenode.decommission_jsp + org.apache.hadoop.hdfs.server.namenode.decommission_jsp + + + + org.apache.hadoop.hdfs.server.namenode.nn_005fbrowsedfscontent_jsp + org.apache.hadoop.hdfs.server.namenode.nn_005fbrowsedfscontent_jsp + + + + org.apache.hadoop.hdfs.server.namenode.corrupt_005ffiles_jsp + org.apache.hadoop.hdfs.server.namenode.corrupt_005ffiles_jsp + + + + org.apache.hadoop.hdfs.server.namenode.corrupt_005freplicas_005fxml_jsp + org.apache.hadoop.hdfs.server.namenode.corrupt_005freplicas_005fxml_jsp + + + + org.apache.hadoop.hdfs.server.namenode.block_005finfo_005fxml_jsp + /block_info_xml.jsp + + + + org.apache.hadoop.hdfs.server.namenode.dfsnodelist_jsp + /dfsnodelist.jsp + + + + org.apache.hadoop.hdfs.server.namenode.dfsclusterhealth_jsp + /dfsclusterhealth.jsp + + + + org.apache.hadoop.hdfs.server.namenode.dfshealth_jsp + /dfshealth.jsp + + + + org.apache.hadoop.hdfs.server.namenode.decommission_jsp + /decommission.jsp + + + + org.apache.hadoop.hdfs.server.namenode.nn_005fbrowsedfscontent_jsp + /nn_browsedfscontent.jsp + + + + org.apache.hadoop.hdfs.server.namenode.corrupt_005ffiles_jsp + /corrupt_files.jsp + + + + org.apache.hadoop.hdfs.server.namenode.corrupt_005freplicas_005fxml_jsp + /corrupt_replicas_xml.jsp + + + + + diff --git a/aarch64/share/hadoop/hdfs/webapps/hdfs/decommission.xsl b/aarch64/share/hadoop/hdfs/webapps/hdfs/decommission.xsl new file mode 100644 index 0000000..dba2a07 --- /dev/null +++ b/aarch64/share/hadoop/hdfs/webapps/hdfs/decommission.xsl @@ -0,0 +1,139 @@ + + + + + + + + + + + + + + Hadoop cluster + <xsl:value-of select="cluster/@clusterId" /> + + + + +

    + Cluster ' + + ' +

    + +

    Decommissioning Status

    + + +
    + + + + + + + + + + +
    + + : + +
    +
    + +
    +
    + + + +
    + + + + + + + + + + + + + + + +
    + +
    + + + + + + + + + + + + + +
    +
    + +
    + + +

    Unreported Namenodes

    +
    + + + + + + + + + +
    + + + +
    +
    +
    + + +

    Exception

    + + + +
    + + + +
    +
    + diff --git a/aarch64/share/hadoop/hdfs/webapps/hdfs/dfsclusterhealth.xsl b/aarch64/share/hadoop/hdfs/webapps/hdfs/dfsclusterhealth.xsl new file mode 100644 index 0000000..77a15cd --- /dev/null +++ b/aarch64/share/hadoop/hdfs/webapps/hdfs/dfsclusterhealth.xsl @@ -0,0 +1,170 @@ + + + + + + + + + + + + + + + Hadoop cluster + <xsl:value-of select="cluster/@clusterId" /> + + + + +

    + Cluster ' + + ' +

    + +

    Cluster Summary

    + +
    + + + + + + + + + + +
    + + : + + + + + + + + + + + + + +
    +
    + +
    +
    +
    + +

    Namenodes

    + +
    + + + + + + + + +
    Number of namenodes: + +
    +
    + +
    + +
    + + + + + + + + + + + + + + + + + + +
    + +
    + + + + + + + + + + + + + +
    +
    +
    + + +

    Unreported Namenodes

    +
    + + + + + + + + + +
    + + + +
    +
    +
    + + +

    Exception

    + + + +
    + + + +
    +
    + diff --git a/aarch64/share/hadoop/hdfs/webapps/hdfs/dfsclusterhealth_utils.xsl b/aarch64/share/hadoop/hdfs/webapps/hdfs/dfsclusterhealth_utils.xsl new file mode 100644 index 0000000..8c89b42 --- /dev/null +++ b/aarch64/share/hadoop/hdfs/webapps/hdfs/dfsclusterhealth_utils.xsl @@ -0,0 +1,88 @@ + + + + + + + + + + + + + + + + + b + kb + Mb + Gb + + Tb + Pb + b + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/aarch64/share/hadoop/hdfs/webapps/hdfs/index.html b/aarch64/share/hadoop/hdfs/webapps/hdfs/index.html new file mode 100644 index 0000000..648da4a --- /dev/null +++ b/aarch64/share/hadoop/hdfs/webapps/hdfs/index.html @@ -0,0 +1,35 @@ + + + + +Hadoop Administration + + + + +

    Hadoop Administration

    + + + + + + diff --git a/aarch64/share/hadoop/hdfs/webapps/journal/WEB-INF/web.xml b/aarch64/share/hadoop/hdfs/webapps/journal/WEB-INF/web.xml new file mode 100644 index 0000000..526806f --- /dev/null +++ b/aarch64/share/hadoop/hdfs/webapps/journal/WEB-INF/web.xml @@ -0,0 +1,39 @@ + + + + + + + + org.apache.hadoop.hdfs.server.journalservice.journalstatus_jsp + org.apache.hadoop.hdfs.server.journalservice.journalstatus_jsp + + + + org.apache.hadoop.hdfs.server.journalservice.journalstatus_jsp + /journalstatus.jsp + + + + + diff --git a/aarch64/share/hadoop/hdfs/webapps/journal/index.html b/aarch64/share/hadoop/hdfs/webapps/journal/index.html new file mode 100644 index 0000000..bc9ea42 --- /dev/null +++ b/aarch64/share/hadoop/hdfs/webapps/journal/index.html @@ -0,0 +1,29 @@ + + + +Hadoop Administration + + +

    Hadoop Administration

    + + + + + diff --git a/aarch64/share/hadoop/hdfs/webapps/secondary/WEB-INF/web.xml b/aarch64/share/hadoop/hdfs/webapps/secondary/WEB-INF/web.xml new file mode 100644 index 0000000..f35c20a --- /dev/null +++ b/aarch64/share/hadoop/hdfs/webapps/secondary/WEB-INF/web.xml @@ -0,0 +1,39 @@ + + + + + + + + org.apache.hadoop.hdfs.server.namenode.status_jsp + org.apache.hadoop.hdfs.server.namenode.status_jsp + + + + org.apache.hadoop.hdfs.server.namenode.status_jsp + /status.jsp + + + + + diff --git a/aarch64/share/hadoop/hdfs/webapps/secondary/index.html b/aarch64/share/hadoop/hdfs/webapps/secondary/index.html new file mode 100644 index 0000000..988f03d --- /dev/null +++ b/aarch64/share/hadoop/hdfs/webapps/secondary/index.html @@ -0,0 +1,29 @@ + + + +Hadoop Administration + + +

    Hadoop Administration

    + + + + + diff --git a/aarch64/share/hadoop/hdfs/webapps/static/hadoop.css b/aarch64/share/hadoop/hdfs/webapps/static/hadoop.css new file mode 100644 index 0000000..9b031c7 --- /dev/null +++ b/aarch64/share/hadoop/hdfs/webapps/static/hadoop.css @@ -0,0 +1,190 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +body { + background-color : #ffffff; + font-family : sans-serif; +} + +.small { + font-size : smaller; +} + +div#dfsnodetable tr#row1, div.dfstable td.col1 { + font-weight : bolder; +} + +div.dfstable th { + text-align:left; + vertical-align : top; +} + +div.dfstable td#col3 { + text-align : right; +} + +div#dfsnodetable caption { + text-align : left; +} + +div#dfsnodetable a#title { + font-size : larger; + font-weight : bolder; +} + +div#dfsnodetable td, th { + padding-bottom : 4px; + padding-top : 4px; +} + +div#dfsnodetable A:link, A:visited { + text-decoration : none; +} + +div#dfsnodetable th.header, th.headerASC, th.headerDSC { + padding-bottom : 8px; + padding-top : 8px; +} +div#dfsnodetable th.header:hover, th.headerASC:hover, th.headerDSC:hover, + td.name:hover { + text-decoration : underline; + cursor : pointer; +} + +div#dfsnodetable td.blocks, td.size, td.pcused, td.adminstate, td.lastcontact { + text-align : right; +} + +div#dfsnodetable .rowNormal .header { + background-color : #ffffff; +} +div#dfsnodetable .rowAlt, .headerASC, .headerDSC { + background-color : lightyellow; +} + +.warning { + font-weight : bolder; + color : red; +} + +div.dfstable table { + white-space : pre; +} + +table.storage, table.nodes { + border-collapse: collapse; +} + +table.storage td { + padding:10px; + border:1px solid black; +} + +table.nodes td { + padding:0px; + border:1px solid black; +} + +div#dfsnodetable td, div#dfsnodetable th, div.dfstable td { + padding-left : 10px; + padding-right : 10px; + border:1px solid black; +} + +td.perc_filled { + background-color:#AAAAFF; +} + +td.perc_nonfilled { + background-color:#FFFFFF; +} + +line.taskgraphline { + stroke-width:1;stroke-linecap:round; +} + +#quicklinks { + margin: 0; + padding: 2px 4px; + position: fixed; + top: 0; + right: 0; + text-align: right; + background-color: #eee; + font-weight: bold; +} + +#quicklinks ul { + margin: 0; + padding: 0; + list-style-type: none; + font-weight: normal; +} + +#quicklinks ul { + display: none; +} + +#quicklinks a { + font-size: smaller; + text-decoration: none; +} + +#quicklinks ul a { + text-decoration: underline; +} + +span.failed { + color:red; +} + +div.security { + width:100%; +} + +#startupprogress table, #startupprogress th, #startupprogress td { + border-collapse: collapse; + border-left: 1px solid black; + border-right: 1px solid black; + padding: 5px; + text-align: left; +} + +#startupprogress table { + border: 1px solid black; +} + +.phase { + border-top: 1px solid black; + font-weight: bold; +} + +.current { + font-style: italic; +} + +.later { + color: gray; +} + +.step .startupdesc { + text-indent: 20px; +} + +#startupprogress span { + font-weight: bold; +} diff --git a/aarch64/share/hadoop/httpfs/tomcat/LICENSE b/aarch64/share/hadoop/httpfs/tomcat/LICENSE new file mode 100644 index 0000000..51c34b6 --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/LICENSE @@ -0,0 +1,707 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + +APACHE TOMCAT SUBCOMPONENTS: + +Apache Tomcat includes a number of subcomponents with separate copyright notices +and license terms. Your use of these subcomponents is subject to the terms and +conditions of the following licenses. + + +For the Eclipse JDT Java compiler: + +Eclipse Public License - v 1.0 + +THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC +LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM +CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. + +1. DEFINITIONS + +"Contribution" means: + +a) in the case of the initial Contributor, the initial code and documentation +distributed under this Agreement, and + +b) in the case of each subsequent Contributor: + +i) changes to the Program, and + +ii) additions to the Program; + +where such changes and/or additions to the Program originate from and are +distributed by that particular Contributor. A Contribution 'originates' from a +Contributor if it was added to the Program by such Contributor itself or anyone +acting on such Contributor's behalf. Contributions do not include additions to +the Program which: (i) are separate modules of software distributed in +conjunction with the Program under their own license agreement, and (ii) are not +derivative works of the Program. + +"Contributor" means any person or entity that distributes the Program. + +"Licensed Patents" mean patent claims licensable by a Contributor which are +necessarily infringed by the use or sale of its Contribution alone or when +combined with the Program. + +"Program" means the Contributions distributed in accordance with this Agreement. + +"Recipient" means anyone who receives the Program under this Agreement, +including all Contributors. + +2. GRANT OF RIGHTS + +a) Subject to the terms of this Agreement, each Contributor hereby grants +Recipient a non-exclusive, worldwide, royalty-free copyright license to +reproduce, prepare derivative works of, publicly display, publicly perform, +distribute and sublicense the Contribution of such Contributor, if any, and such +derivative works, in source code and object code form. + +b) Subject to the terms of this Agreement, each Contributor hereby grants +Recipient a non-exclusive, worldwide, royalty-free patent license under Licensed +Patents to make, use, sell, offer to sell, import and otherwise transfer the +Contribution of such Contributor, if any, in source code and object code form. +This patent license shall apply to the combination of the Contribution and the +Program if, at the time the Contribution is added by the Contributor, such +addition of the Contribution causes such combination to be covered by the +Licensed Patents. The patent license shall not apply to any other combinations +which include the Contribution. No hardware per se is licensed hereunder. + +c) Recipient understands that although each Contributor grants the licenses to +its Contributions set forth herein, no assurances are provided by any +Contributor that the Program does not infringe the patent or other intellectual +property rights of any other entity. Each Contributor disclaims any liability to +Recipient for claims brought by any other entity based on infringement of +intellectual property rights or otherwise. As a condition to exercising the +rights and licenses granted hereunder, each Recipient hereby assumes sole +responsibility to secure any other intellectual property rights needed, if any. +For example, if a third party patent license is required to allow Recipient to +distribute the Program, it is Recipient's responsibility to acquire that license +before distributing the Program. + +d) Each Contributor represents that to its knowledge it has sufficient copyright +rights in its Contribution, if any, to grant the copyright license set forth in +this Agreement. + +3. REQUIREMENTS + +A Contributor may choose to distribute the Program in object code form under its +own license agreement, provided that: + +a) it complies with the terms and conditions of this Agreement; and + +b) its license agreement: + +i) effectively disclaims on behalf of all Contributors all warranties and +conditions, express and implied, including warranties or conditions of title and +non-infringement, and implied warranties or conditions of merchantability and +fitness for a particular purpose; + +ii) effectively excludes on behalf of all Contributors all liability for +damages, including direct, indirect, special, incidental and consequential +damages, such as lost profits; + +iii) states that any provisions which differ from this Agreement are offered by +that Contributor alone and not by any other party; and + +iv) states that source code for the Program is available from such Contributor, +and informs licensees how to obtain it in a reasonable manner on or through a +medium customarily used for software exchange. + +When the Program is made available in source code form: + +a) it must be made available under this Agreement; and + +b) a copy of this Agreement must be included with each copy of the Program. + +Contributors may not remove or alter any copyright notices contained within the +Program. + +Each Contributor must identify itself as the originator of its Contribution, if +any, in a manner that reasonably allows subsequent Recipients to identify the +originator of the Contribution. + +4. COMMERCIAL DISTRIBUTION + +Commercial distributors of software may accept certain responsibilities with +respect to end users, business partners and the like. While this license is +intended to facilitate the commercial use of the Program, the Contributor who +includes the Program in a commercial product offering should do so in a manner +which does not create potential liability for other Contributors. Therefore, if +a Contributor includes the Program in a commercial product offering, such +Contributor ("Commercial Contributor") hereby agrees to defend and indemnify +every other Contributor ("Indemnified Contributor") against any losses, damages +and costs (collectively "Losses") arising from claims, lawsuits and other legal +actions brought by a third party against the Indemnified Contributor to the +extent caused by the acts or omissions of such Commercial Contributor in +connection with its distribution of the Program in a commercial product +offering. The obligations in this section do not apply to any claims or Losses +relating to any actual or alleged intellectual property infringement. In order +to qualify, an Indemnified Contributor must: a) promptly notify the Commercial +Contributor in writing of such claim, and b) allow the Commercial Contributor +to control, and cooperate with the Commercial Contributor in, the defense and +any related settlement negotiations. The Indemnified Contributor may +participate in any such claim at its own expense. + +For example, a Contributor might include the Program in a commercial product +offering, Product X. That Contributor is then a Commercial Contributor. If that +Commercial Contributor then makes performance claims, or offers warranties +related to Product X, those performance claims and warranties are such +Commercial Contributor's responsibility alone. Under this section, the +Commercial Contributor would have to defend claims against the other +Contributors related to those performance claims and warranties, and if a court +requires any other Contributor to pay any damages as a result, the Commercial +Contributor must pay those damages. + +5. NO WARRANTY + +EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON AN +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR +IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, +NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each +Recipient is solely responsible for determining the appropriateness of using and +distributing the Program and assumes all risks associated with its exercise of +rights under this Agreement , including but not limited to the risks and costs +of program errors, compliance with applicable laws, damage to or loss of data, +programs or equipment, and unavailability or interruption of operations. + +6. DISCLAIMER OF LIABILITY + +EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY +CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST +PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS +GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +7. GENERAL + +If any provision of this Agreement is invalid or unenforceable under applicable +law, it shall not affect the validity or enforceability of the remainder of the +terms of this Agreement, and without further action by the parties hereto, such +provision shall be reformed to the minimum extent necessary to make such +provision valid and enforceable. + +If Recipient institutes patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Program itself +(excluding combinations of the Program with other software or hardware) +infringes such Recipient's patent(s), then such Recipient's rights granted under +Section 2(b) shall terminate as of the date such litigation is filed. + +All Recipient's rights under this Agreement shall terminate if it fails to +comply with any of the material terms or conditions of this Agreement and does +not cure such failure in a reasonable period of time after becoming aware of +such noncompliance. If all Recipient's rights under this Agreement terminate, +Recipient agrees to cease use and distribution of the Program as soon as +reasonably practicable. However, Recipient's obligations under this Agreement +and any licenses granted by Recipient relating to the Program shall continue and +survive. + +Everyone is permitted to copy and distribute copies of this Agreement, but in +order to avoid inconsistency the Agreement is copyrighted and may only be +modified in the following manner. The Agreement Steward reserves the right to +publish new versions (including revisions) of this Agreement from time to time. +No one other than the Agreement Steward has the right to modify this Agreement. +The Eclipse Foundation is the initial Agreement Steward. The Eclipse Foundation +may assign the responsibility to serve as the Agreement Steward to a suitable +separate entity. Each new version of the Agreement will be given a +distinguishing version number. The Program (including Contributions) may always +be distributed subject to the version of the Agreement under which it was +received. In addition, after a new version of the Agreement is published, +Contributor may elect to distribute the Program (including its Contributions) +under the new version. Except as expressly stated in Sections 2(a) and 2(b) +above, Recipient receives no rights or licenses to the intellectual property of +any Contributor under this Agreement, whether expressly, by implication, +estoppel or otherwise. All rights in the Program not expressly granted under +this Agreement are reserved. + +This Agreement is governed by the laws of the State of New York and the +intellectual property laws of the United States of America. No party to this +Agreement will bring a legal action under this Agreement more than one year +after the cause of action arose. Each party waives its rights to a jury trial in +any resulting litigation. + + +For the Windows Installer component: + + * All NSIS source code, plug-ins, documentation, examples, header files and + graphics, with the exception of the compression modules and where + otherwise noted, are licensed under the zlib/libpng license. + * The zlib compression module for NSIS is licensed under the zlib/libpng + license. + * The bzip2 compression module for NSIS is licensed under the bzip2 license. + * The lzma compression module for NSIS is licensed under the Common Public + License version 1.0. + +zlib/libpng license + +This software is provided 'as-is', without any express or implied warranty. In +no event will the authors be held liable for any damages arising from the use of +this software. + +Permission is granted to anyone to use this software for any purpose, including +commercial applications, and to alter it and redistribute it freely, subject to +the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not claim + that you wrote the original software. If you use this software in a + product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + +bzip2 license + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + 2. The origin of this software must not be misrepresented; you must not claim + that you wrote the original software. If you use this software in a + product, an acknowledgment in the product documentation would be + appreciated but is not required. + 3. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 4. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS AND ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY +OF SUCH DAMAGE. + +Julian Seward, Cambridge, UK. + +jseward@acm.org +Common Public License version 1.0 + +THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS COMMON PUBLIC +LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM +CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. + +1. DEFINITIONS + +"Contribution" means: + +a) in the case of the initial Contributor, the initial code and documentation +distributed under this Agreement, and b) in the case of each subsequent +Contributor: + +i) changes to the Program, and + +ii) additions to the Program; + +where such changes and/or additions to the Program originate from and are +distributed by that particular Contributor. A Contribution 'originates' from a +Contributor if it was added to the Program by such Contributor itself or anyone +acting on such Contributor's behalf. Contributions do not include additions to +the Program which: (i) are separate modules of software distributed in +conjunction with the Program under their own license agreement, and (ii) are not +derivative works of the Program. + +"Contributor" means any person or entity that distributes the Program. + +"Licensed Patents " mean patent claims licensable by a Contributor which are +necessarily infringed by the use or sale of its Contribution alone or when +combined with the Program. + +"Program" means the Contributions distributed in accordance with this Agreement. + +"Recipient" means anyone who receives the Program under this Agreement, +including all Contributors. + +2. GRANT OF RIGHTS + +a) Subject to the terms of this Agreement, each Contributor hereby grants +Recipient a non-exclusive, worldwide, royalty-free copyright license to +reproduce, prepare derivative works of, publicly display, publicly perform, +distribute and sublicense the Contribution of such Contributor, if any, and such +derivative works, in source code and object code form. + +b) Subject to the terms of this Agreement, each Contributor hereby grants +Recipient a non-exclusive, worldwide, royalty-free patent license under Licensed +Patents to make, use, sell, offer to sell, import and otherwise transfer the +Contribution of such Contributor, if any, in source code and object code form. +This patent license shall apply to the combination of the Contribution and the +Program if, at the time the Contribution is added by the Contributor, such +addition of the Contribution causes such combination to be covered by the +Licensed Patents. The patent license shall not apply to any other combinations +which include the Contribution. No hardware per se is licensed hereunder. + +c) Recipient understands that although each Contributor grants the licenses to +its Contributions set forth herein, no assurances are provided by any +Contributor that the Program does not infringe the patent or other intellectual +property rights of any other entity. Each Contributor disclaims any liability to +Recipient for claims brought by any other entity based on infringement of +intellectual property rights or otherwise. As a condition to exercising the +rights and licenses granted hereunder, each Recipient hereby assumes sole +responsibility to secure any other intellectual property rights needed, if any. +For example, if a third party patent license is required to allow Recipient to +distribute the Program, it is Recipient's responsibility to acquire that license +before distributing the Program. + +d) Each Contributor represents that to its knowledge it has sufficient copyright +rights in its Contribution, if any, to grant the copyright license set forth in +this Agreement. + +3. REQUIREMENTS + +A Contributor may choose to distribute the Program in object code form under its +own license agreement, provided that: + +a) it complies with the terms and conditions of this Agreement; and + +b) its license agreement: + +i) effectively disclaims on behalf of all Contributors all warranties and +conditions, express and implied, including warranties or conditions of title and +non-infringement, and implied warranties or conditions of merchantability and +fitness for a particular purpose; + +ii) effectively excludes on behalf of all Contributors all liability for +damages, including direct, indirect, special, incidental and consequential +damages, such as lost profits; + +iii) states that any provisions which differ from this Agreement are offered by +that Contributor alone and not by any other party; and + +iv) states that source code for the Program is available from such Contributor, +and informs licensees how to obtain it in a reasonable manner on or through a +medium customarily used for software exchange. + +When the Program is made available in source code form: + +a) it must be made available under this Agreement; and + +b) a copy of this Agreement must be included with each copy of the Program. + +Contributors may not remove or alter any copyright notices contained within the +Program. + +Each Contributor must identify itself as the originator of its Contribution, if +any, in a manner that reasonably allows subsequent Recipients to identify the +originator of the Contribution. + +4. COMMERCIAL DISTRIBUTION + +Commercial distributors of software may accept certain responsibilities with +respect to end users, business partners and the like. While this license is +intended to facilitate the commercial use of the Program, the Contributor who +includes the Program in a commercial product offering should do so in a manner +which does not create potential liability for other Contributors. Therefore, if +a Contributor includes the Program in a commercial product offering, such +Contributor ("Commercial Contributor") hereby agrees to defend and indemnify +every other Contributor ("Indemnified Contributor") against any losses, damages +and costs (collectively "Losses") arising from claims, lawsuits and other legal +actions brought by a third party against the Indemnified Contributor to the +extent caused by the acts or omissions of such Commercial Contributor in +connection with its distribution of the Program in a commercial product +offering. The obligations in this section do not apply to any claims or Losses +relating to any actual or alleged intellectual property infringement. In order +to qualify, an Indemnified Contributor must: a) promptly notify the Commercial +Contributor in writing of such claim, and b) allow the Commercial Contributor to +control, and cooperate with the Commercial Contributor in, the defense and any +related settlement negotiations. The Indemnified Contributor may participate in +any such claim at its own expense. + +For example, a Contributor might include the Program in a commercial product +offering, Product X. That Contributor is then a Commercial Contributor. If that +Commercial Contributor then makes performance claims, or offers warranties +related to Product X, those performance claims and warranties are such +Commercial Contributor's responsibility alone. Under this section, the +Commercial Contributor would have to defend claims against the other +Contributors related to those performance claims and warranties, and if a court +requires any other Contributor to pay any damages as a result, the Commercial +Contributor must pay those damages. + +5. NO WARRANTY + +EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON AN +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR +IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, +NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each +Recipient is solely responsible for determining the appropriateness of using and +distributing the Program and assumes all risks associated with its exercise of +rights under this Agreement, including but not limited to the risks and costs of +program errors, compliance with applicable laws, damage to or loss of data, +programs or equipment, and unavailability or interruption of operations. + +6. DISCLAIMER OF LIABILITY + +EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY +CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST +PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS +GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +7. GENERAL + +If any provision of this Agreement is invalid or unenforceable under applicable +law, it shall not affect the validity or enforceability of the remainder of the +terms of this Agreement, and without further action by the parties hereto, such +provision shall be reformed to the minimum extent necessary to make such +provision valid and enforceable. + +If Recipient institutes patent litigation against a Contributor with respect to +a patent applicable to software (including a cross-claim or counterclaim in a +lawsuit), then any patent licenses granted by that Contributor to such Recipient +under this Agreement shall terminate as of the date such litigation is filed. In +addition, if Recipient institutes patent litigation against any entity +(including a cross-claim or counterclaim in a lawsuit) alleging that the Program +itself (excluding combinations of the Program with other software or hardware) +infringes such Recipient's patent(s), then such Recipient's rights granted under +Section 2(b) shall terminate as of the date such litigation is filed. + +All Recipient's rights under this Agreement shall terminate if it fails to +comply with any of the material terms or conditions of this Agreement and does +not cure such failure in a reasonable period of time after becoming aware of +such noncompliance. If all Recipient's rights under this Agreement terminate, +Recipient agrees to cease use and distribution of the Program as soon as +reasonably practicable. However, Recipient's obligations under this Agreement +and any licenses granted by Recipient relating to the Program shall continue and +survive. + +Everyone is permitted to copy and distribute copies of this Agreement, but in +order to avoid inconsistency the Agreement is copyrighted and may only be +modified in the following manner. The Agreement Steward reserves the right to +publish new versions (including revisions) of this Agreement from time to time. +No one other than the Agreement Steward has the right to modify this Agreement. +IBM is the initial Agreement Steward. IBM may assign the responsibility to serve +as the Agreement Steward to a suitable separate entity. Each new version of the +Agreement will be given a distinguishing version number. The Program (including +Contributions) may always be distributed subject to the version of the Agreement +under which it was received. In addition, after a new version of the Agreement +is published, Contributor may elect to distribute the Program (including its +Contributions) under the new version. Except as expressly stated in Sections +2(a) and 2(b) above, Recipient receives no rights or licenses to the +intellectual property of any Contributor under this Agreement, whether +expressly, by implication, estoppel or otherwise. All rights in the Program not +expressly granted under this Agreement are reserved. + +This Agreement is governed by the laws of the State of New York and the +intellectual property laws of the United States of America. No party to this +Agreement will bring a legal action under this Agreement more than one year +after the cause of action arose. Each party waives its rights to a jury trial in +any resulting litigation. + +Special exception for LZMA compression module + +Igor Pavlov and Amir Szekely, the authors of the LZMA compression module for +NSIS, expressly permit you to statically or dynamically link your code (or bind +by name) to the files from the LZMA compression module for NSIS without +subjecting your linked code to the terms of the Common Public license version +1.0. Any modifications or additions to files from the LZMA compression module +for NSIS, however, are subject to the terms of the Common Public License version +1.0. \ No newline at end of file diff --git a/aarch64/share/hadoop/httpfs/tomcat/NOTICE b/aarch64/share/hadoop/httpfs/tomcat/NOTICE new file mode 100644 index 0000000..aaa19b6 --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/NOTICE @@ -0,0 +1,16 @@ +Apache Tomcat +Copyright 1999-2012 The Apache Software Foundation + +This product includes software developed by +The Apache Software Foundation (http://www.apache.org/). + +The Windows Installer is built with the Nullsoft +Scriptable Install Sysem (NSIS), which is +open source software. The original software and +related information is available at +http://nsis.sourceforge.net. + +Java compilation software for JSP pages is provided by Eclipse, +which is open source software. The original software and +related information is available at +http://www.eclipse.org. diff --git a/aarch64/share/hadoop/httpfs/tomcat/RELEASE-NOTES b/aarch64/share/hadoop/httpfs/tomcat/RELEASE-NOTES new file mode 100644 index 0000000..a90c104 --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/RELEASE-NOTES @@ -0,0 +1,234 @@ +================================================================================ + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +================================================================================ + +$Id: RELEASE-NOTES 1392285 2012-10-01 11:32:00Z kkolinko $ + + + Apache Tomcat Version 6.0.36 + Release Notes + + +============================= +KNOWN ISSUES IN THIS RELEASE: +============================= + +* Dependency Changes +* JNI Based Applications +* Bundled APIs +* Web application reloading and static fields in shared libraries +* Tomcat on Linux +* Enabling SSI and CGI Support +* Security manager URLs +* Symlinking static resources +* Enabling invoker servlet +* Viewing the Tomcat Change Log +* Cryptographic software notice +* When all else fails + + +=================== +Dependency Changes: +=================== +Tomcat 6.0 is designed to run on Java SE 5.0 and later. + +In addition, Tomcat 6.0 uses the Eclipse JDT Java compiler for compiling +JSP pages. This means you no longer need to have the complete +Java Development Kit (JDK) to run Tomcat, but a Java Runtime Environment +(JRE) is sufficient. The Eclipse JDT Java compiler is bundled with the +binary Tomcat distributions. Tomcat can also be configured to use the +compiler from the JDK to compile JSPs, or any other Java compiler supported +by Apache Ant. + + +======================= +JNI Based Applications: +======================= +Applications that require native libraries must ensure that the libraries have +been loaded prior to use. Typically, this is done with a call like: + + static { + System.loadLibrary("path-to-library-file"); + } + +in some class. However, the application must also ensure that the library is +not loaded more than once. If the above code were placed in a class inside +the web application (i.e. under /WEB-INF/classes or /WEB-INF/lib), and the +application were reloaded, the loadLibrary() call would be attempted a second +time. + +To avoid this problem, place classes that load native libraries outside of the +web application, and ensure that the loadLibrary() call is executed only once +during the lifetime of a particular JVM. + + +============= +Bundled APIs: +============= +A standard installation of Tomcat 6.0 makes all of the following APIs available +for use by web applications (by placing them in "lib"): +* annotations-api.jar (Annotations package) +* catalina.jar (Tomcat Catalina implementation) +* catalina-ant.jar (Tomcat Catalina Ant tasks) +* catalina-ha.jar (High availability package) +* catalina-tribes.jar (Group communication) +* ecj-@JDT_VERSION@.jar (Eclipse JDT Java compiler) +* el-api.jar (EL 2.1 API) +* jasper.jar (Jasper 2 Compiler and Runtime) +* jasper-el.jar (Jasper 2 EL implementation) +* jsp-api.jar (JSP 2.1 API) +* servlet-api.jar (Servlet 2.5 API) +* tomcat-coyote.jar (Tomcat connectors and utility classes) +* tomcat-dbcp.jar (package renamed database connection pool based on Commons DBCP) + +You can make additional APIs available to all of your web applications by +putting unpacked classes into a "classes" directory (not created by default), +or by placing them in JAR files in the "lib" directory. + +To override the XML parser implementation or interfaces, use the endorsed +mechanism of the JVM. The default configuration defines JARs located in +"endorsed" as endorsed. + + +================================================================ +Web application reloading and static fields in shared libraries: +================================================================ +Some shared libraries (many are part of the JDK) keep references to objects +instantiated by the web application. To avoid class loading related problems +(ClassCastExceptions, messages indicating that the classloader +is stopped, etc.), the shared libraries state should be reinitialized. + +Something which might help is to avoid putting classes which would be +referenced by a shared static field in the web application classloader, +and putting them in the shared classloader instead (JARs should be put in the +"lib" folder, and classes should be put in the "classes" folder). + + +================ +Tomcat on Linux: +================ +GLIBC 2.2 / Linux 2.4 users should define an environment variable: +export LD_ASSUME_KERNEL=2.2.5 + +Redhat Linux 9.0 users should use the following setting to avoid +stability problems: +export LD_ASSUME_KERNEL=2.4.1 + +There are some Linux bugs reported against the NIO sendfile behavior, make sure you +have a JDK that is up to date, or disable sendfile behavior in the Connector.
    +6427312: (fc) FileChannel.transferTo() throws IOException "system call interrupted"
    +5103988: (fc) FileChannel.transferTo should return -1 for EAGAIN instead throws IOException
    +6253145: (fc) FileChannel.transferTo on Linux fails when going beyond 2GB boundary
    +6470086: (fc) FileChannel.transferTo(2147483647, 1, channel) cause "Value too large" exception
    + + +============================= +Enabling SSI and CGI Support: +============================= +Because of the security risks associated with CGI and SSI available +to web applications, these features are disabled by default. + +To enable and configure CGI support, please see the cgi-howto.html page. + +To enable and configue SSI support, please see the ssi-howto.html page. + + +====================== +Security manager URLs: +====================== +In order to grant security permissions to JARs located inside the +web application repository, use URLs of of the following format +in your policy file: + +file:${catalina.base}/webapps/examples/WEB-INF/lib/driver.jar + + +============================ +Symlinking static resources: +============================ +By default, Unix symlinks will not work when used in a web application to link +resources located outside the web application root directory. + +This behavior is optional, and the "allowLinking" flag may be used to disable +the check. + + +========================= +Enabling invoker servlet: +========================= +Starting with Tomcat 4.1.12, the invoker servlet is no longer available by +default in all webapps. Enabling it for all webapps is possible by editing +$CATALINA_HOME/conf/web.xml to uncomment the "/servlet/*" servlet-mapping +definition. + +Using the invoker servlet in a production environment is not recommended and +is unsupported. More details are available on the Tomcat FAQ at +http://tomcat.apache.org/faq/misc.html#invoker. + + +============================== +Viewing the Tomcat Change Log: +============================== +See changelog.html in this directory. + + +============================================ +Multi-byte charset handling bug in Java 1.5: +============================================ +Public versions of Sun/Oracle Java 1.5 are known to have a nasty bug in +implementation of Charset.decode() method for certain character sets. + +For details, test and a list of affected character sets see: + +http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6196991 +https://issues.apache.org/bugzilla/show_bug.cgi?id=52579 + +The UTF-8 charset is not affected by this issue. + + +============================= +Cryptographic software notice +============================= +This distribution includes cryptographic software. The country in +which you currently reside may have restrictions on the import, +possession, use, and/or re-export to another country, of +encryption software. BEFORE using any encryption software, please +check your country's laws, regulations and policies concerning the +import, possession, or use, and re-export of encryption software, to +see if this is permitted. See for more +information. + +The U.S. Government Department of Commerce, Bureau of Industry and +Security (BIS), has classified this software as Export Commodity +Control Number (ECCN) 5D002.C.1, which includes information security +software using or performing cryptographic functions with asymmetric +algorithms. The form and manner of this Apache Software Foundation +distribution makes it eligible for export under the License Exception +ENC Technology Software Unrestricted (TSU) exception (see the BIS +Export Administration Regulations, Section 740.13) for both object +code and source code. + +The following provides more details on the included cryptographic +software: + - Tomcat includes code designed to work with JSSE + - Tomcat includes code designed to work with OpenSSL + + +==================== +When all else fails: +==================== +See the FAQ +http://tomcat.apache.org/faq/ diff --git a/aarch64/share/hadoop/httpfs/tomcat/RUNNING.txt b/aarch64/share/hadoop/httpfs/tomcat/RUNNING.txt new file mode 100644 index 0000000..2e56cf9 --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/RUNNING.txt @@ -0,0 +1,454 @@ +================================================================================ + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +================================================================================ + +$Id: RUNNING.txt 1348600 2012-06-10 14:11:54Z kkolinko $ + + =================================================== + Running The Apache Tomcat 6.0 Servlet/JSP Container + =================================================== + +Apache Tomcat 6.0 requires a Java Standard Edition Runtime +Environment (JRE) version 5.0 or later. + +============================= +Running With JRE 5.0 Or Later +============================= + +(1) Download and Install a Java SE Runtime Environment (JRE) + +(1.1) Download a Java SE Runtime Environment (JRE), + release version 5.0 or later, from + http://www.oracle.com/technetwork/java/javase/downloads/index.html + +(1.2) Install the JRE according to the instructions included with the + release. + + You may also use a full Java Development Kit (JDK) rather than just + a JRE. + + +(2) Download and Install Apache Tomcat + +(2.1) Download a binary distribution of Tomcat from: + + http://tomcat.apache.org/ + +(2.2) Unpack the binary distribution so that it resides in its own + directory (conventionally named "apache-tomcat-[version]"). + + For the purposes of the remainder of this document, the name + "CATALINA_HOME" is used to refer to the full pathname of that + directory. + +NOTE: As an alternative to downloading a binary distribution, you can +create your own from the Tomcat source code, as described in +"BUILDING.txt". You can either + + a) Do the full "release" build and find the created distribution in the + "output/release" directory and then proceed with unpacking as above, or + + b) Do a simple build and use the "output/build" directory as + "CATALINA_HOME". Be warned that there are some differences between the + contents of the "output/build" directory and a full "release" + distribution. + + +(3) Configure Environment Variables + +Tomcat is a Java application and does not use environment variables. The +variables are used by the Tomcat startup scripts. The scripts use the variables +to prepare the command that starts Tomcat. + +(3.1) Set CATALINA_HOME (required) and CATALINA_BASE (optional) + +The CATALINA_HOME and CATALINA_BASE environment variables are used to +specify the location of Apache Tomcat and the location of its active +configuration, respectively. + +The CATALINA_HOME environment variable should be set as defined in (2.2) +above. The Tomcat startup scripts have some logic to set this variable +automatically if it is absent (based on the location of the script in +Unixes and on the current directory in Windows), but this logic might not work +in all circumstances. + +The CATALINA_BASE environment variable is optional and is further described +in the "Multiple Tomcat Instances" section below. If it is absent, it defaults +to be equal to CATALINA_HOME. + + +(3.2) Set JRE_HOME or JAVA_HOME (required) + +The JRE_HOME variable is used to specify location of a JRE that is used to +start Tomcat. + +The JAVA_HOME variable is used to specify location of a JDK. It is used instead +of JRE_HOME. + +Using JAVA_HOME provides access to certain additional startup options that +are not allowed when JRE_HOME is used. + +If both JRE_HOME and JAVA_HOME are specified, JRE_HOME is used. + + +(3.3) Other variables (optional) + +Other environment variables exist, besides the four described above. +See the comments at the top of catalina.bat or catalina.sh scripts for +the list and a description of each of them. + +One frequently used variable is CATALINA_OPTS. It allows specification of +additional options for the java command that starts Tomcat. + +See the Java documentation for the options that affect the Java Runtime +Environment. + +See the "System Properties" page in the Tomcat Configuration Reference for +the system properties that are specific to Tomcat. + +A similar variable is JAVA_OPTS. It is used less frequently. It allows +specification of options that are used both to start and to stop Tomcat as well +as for other commands. + +Do not use JAVA_OPTS to specify memory limits. You do not need much memory +for a small process that is used to stop Tomcat. Those settings belong to +CATALINA_OPTS. + +Another frequently used variable is CATALINA_PID (on *nix platforms only). It +specifies the location of the file where process id of the forked Tomcat java +process will be written. This setting is optional. It will enable the +following features: + + - better protection against duplicate start attempts and + - allows forceful termination of Tomcat process when it does not react to + the standard shutdown command. + + +(3.4) setenv script (optional) + +Apart from CATALINA_HOME and CATALINA_BASE, all environment variables can +be specified in the "setenv" script. + +The script is named setenv.bat (Windows) or setenv.sh (*nix). It can be +placed either into CATALINA_BASE/bin or into CATALINA_HOME/bin. The file +has to be readable. + +By default the setenv script file is absent. If the setenv script is +present both in CATALINA_BASE and in CATALINA_HOME, the one in +CATALINA_BASE is used. + +For example, to configure the JRE_HOME and CATALINA_PID variables you can +create the following script file: + +On Windows, %CATALINA_BASE%\bin\setenv.bat: + + set "JRE_HOME=%ProgramFiles%\Java\jre6" + exit /b 0 + +On Unix, $CATALINA_BASE/bin/setenv.sh: + + JRE_HOME=/usr/java/latest + CATALINA_PID="$CATALINA_BASE/tomcat.pid" + +You cannot configure CATALINA_HOME and CATALINA_BASE variables in the +setenv script, because they are used to find that file. + + +(4) Start Up Tomcat + +(4.1) Tomcat can be started by executing one of the following commands: + + %CATALINA_HOME%\bin\startup.bat (Windows) + + $CATALINA_HOME/bin/startup.sh (Unix) + + or + + %CATALINA_HOME%\bin\catalina.bat start (Windows) + + $CATALINA_HOME/bin/catalina.sh start (Unix) + +(4.2) After startup, the default web applications included with Tomcat will be + available by visiting: + + http://localhost:8080/ + +(4.3) Further information about configuring and running Tomcat can be found in + the documentation included here, as well as on the Tomcat web site: + + http://tomcat.apache.org/ + + +(5) Shut Down Tomcat + +(5.1) Tomcat can be shut down by executing one of the following commands: + + %CATALINA_HOME%\bin\shutdown.bat (Windows) + + $CATALINA_HOME/bin/shutdown.sh (Unix) + + or + + %CATALINA_HOME%\bin\catalina.bat stop (Windows) + + $CATALINA_HOME/bin/catalina.sh stop (Unix) + +================================================== +Advanced Configuration - Multiple Tomcat Instances +================================================== + +In many circumstances, it is desirable to have a single copy of a Tomcat +binary distribution shared among multiple users on the same server. To make +this possible, you can set the CATALINA_BASE environment variable to the +directory that contains the files for your 'personal' Tomcat instance. + +When running with a separate CATALINA_HOME and CATALINA_BASE, the files +and directories are split as following: + +In CATALINA_BASE: + + * bin - Only the following files: + + * setenv.sh (*nix) or setenv.bat (Windows), + * tomcat-juli.jar + + The setenv scripts were described above. The tomcat-juli library + is documented in the Logging chapter in the User Guide. + + * conf - Server configuration files (including server.xml) + + * lib - Libraries and classes, as explained below + + * logs - Log and output files + + * webapps - Automatically loaded web applications + + * work - Temporary working directories for web applications + + * temp - Directory used by the JVM for temporary files (java.io.tmpdir) + + +In CATALINA_HOME: + + * bin - Startup and shutdown scripts + + The following files will be used only if they are absent in + CATALINA_BASE/bin: + + setenv.sh (*nix), setenv.bat (Windows), tomcat-juli.jar + + * lib - Libraries and classes, as explained below + + * endorsed - Libraries that override standard "Endorsed Standards" + libraries provided by JRE. See Classloading documentation + in the User Guide for details. + + By default this "endorsed" directory is absent. + +In the default configuration the JAR libraries and classes both in +CATALINA_BASE/lib and in CATALINA_HOME/lib will be added to the common +classpath, but the ones in CATALINA_BASE will be added first and thus will +be searched first. + +The idea is that you may leave the standard Tomcat libraries in +CATALINA_HOME/lib and add other ones such as database drivers into +CATALINA_BASE/lib. + +In general it is advised to never share libraries between web applications, +but put them into WEB-INF/lib directories inside the applications. See +Classloading documentation in the User Guide for details. + + +It might be useful to note that the values of CATALINA_HOME and +CATALINA_BASE can be referenced in the XML configuration files processed +by Tomcat as ${catalina.home} and ${catalina.base} respectively. + +For example, the standard manager web application can be kept in +CATALINA_HOME/webapps/manager and loaded into CATALINA_BASE by using +the following trick: + + * Copy the CATALINA_HOME/webapps/manager/META-INF/context.xml + file as CATALINA_BASE/conf/Catalina/localhost/manager.xml + + * Add docBase attribute as shown below. + +The file will look like the following: + + + + + +See Deployer chapter in User Guide and Context and Host chapters in the +Configuration Reference for more information on contexts and web +application deployment. + + +================ +Troubleshooting +================ + +There are only really 2 things likely to go wrong during the stand-alone +Tomcat install: + +(1) The most common hiccup is when another web server (or any process for that + matter) has laid claim to port 8080. This is the default HTTP port that + Tomcat attempts to bind to at startup. To change this, open the file: + + $CATALINA_HOME/conf/server.xml + + and search for '8080'. Change it to a port that isn't in use, and is + greater than 1024, as ports less than or equal to 1024 require superuser + access to bind under UNIX. + + Restart Tomcat and you're in business. Be sure that you replace the "8080" + in the URL you're using to access Tomcat. For example, if you change the + port to 1977, you would request the URL http://localhost:1977/ in your + browser. + +(2) The 'localhost' machine isn't found. This could happen if you're behind a + proxy. If that's the case, make sure the proxy configuration for your + browser knows that you shouldn't be going through the proxy to access the + "localhost". + + In Firefox, this is under Tools/Preferences -> Advanced/Network -> + Connection -> Settings..., and in Internet Explorer it is Tools -> + Internet Options -> Connections -> LAN Settings. + + +==================== +Optional Components +==================== + +The following optional components may be included with the Apache Tomcat binary +distribution. If they are not included, you can install them separately. + + 1. Apache Tomcat Native library + + 2. Apache Commons Daemon service launcher + +Both of them are implemented in C language and as such have to be compiled +into binary code. The binary code will be specific for a platform and CPU +architecture and it must match the Java Runtime Environment executables +that will be used to launch Tomcat. + +The Windows-specific binary distributions of Apache Tomcat include binary +files for these components. On other platforms you would have to look for +binary versions elsewhere or compile them yourself. + +If you are new to Tomcat, do not bother with these components to start with. +If you do use them, do not forget to read their documentation. + + +Apache Tomcat Native library +----------------------------- + +It is a library that allows to use the "Apr" variant of HTTP and AJP +protocol connectors in Apache Tomcat. It is built around OpenSSL and Apache +Portable Runtime (APR) libraries. Those are the same libraries as used by +Apache HTTPD Server project. + +This feature was especially important in the old days when Java performance +was poor. It is less important nowadays, but it is still used and respected +by many. See Tomcat documentation for more details. + +For further reading: + + - Apache Tomcat documentation + + * Documentation for APR/Native library in the Tomcat User's Guide + + http://tomcat.apache.org/tomcat-6.0-doc/apr.html + + * Documentation for the HTTP and AJP protocol connectors in the Tomcat + Configuration Reference + + http://tomcat.apache.org/tomcat-6.0-doc/config/http.html + + http://tomcat.apache.org/tomcat-6.0-doc/config/ajp.html + + - Apache Tomcat Native project home + + http://tomcat.apache.org/native-doc/ + + - Other projects + + * OpenSSL + + http://openssl.org/ + + * Apache Portable Runtime + + http://apr.apache.org/ + + * Apache HTTP Server + + http://httpd.apache.org/ + +To disable Apache Tomcat Native library: + + - To disable Apache Tomcat Native library when it is installed, or + - To remove the warning that is logged during Tomcat startup when the + library is not installed: + + Edit the "conf/server.xml" file and remove "AprLifecycleListener" from + it. + +The binary file of Apache Tomcat Native library is usually named + + - "tcnative-1.dll" on Windows + - "libtcnative-1.so" on *nix systems + + +Apache Commons Daemon +---------------------- + +Apache Commons Daemon project provides wrappers that can be used to +install Apache Tomcat as a service on Windows or as a daemon on *nix +systems. + +The Windows-specific implementation of Apache Commons Daemon is called +"procrun". The *nix-specific one is called "jsvc". + +For further reading: + + - Apache Commons Daemon project + + http://commons.apache.org/daemon/ + + - Apache Tomcat documentation + + * Installing Apache Tomcat + + http://tomcat.apache.org/tomcat-6.0-doc/setup.html + + * Windows service HOW-TO + + http://tomcat.apache.org/tomcat-6.0-doc/windows-service-howto.html + +The binary files of Apache Commons Daemon in Apache Tomcat distributions +for Windows are named: + + - "tomcat6.exe" + - "tomcat6w.exe" + +These files are renamed copies of "prunsrv.exe" and "prunmgr.exe" from +Apache Commons Daemon distribution. The file names have a meaning: they are +used as the service name to register the service in Windows, as well as the +key name to store distinct configuration for this installation of +"procrun". If you would like to install several instances of Tomcat 6.0 +in parallel, you have to further rename those files, using the same naming +scheme. diff --git a/aarch64/share/hadoop/httpfs/tomcat/bin/bootstrap.jar b/aarch64/share/hadoop/httpfs/tomcat/bin/bootstrap.jar new file mode 100644 index 0000000..eb8c330 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/bin/bootstrap.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/bin/catalina-tasks.xml b/aarch64/share/hadoop/httpfs/tomcat/bin/catalina-tasks.xml new file mode 100644 index 0000000..8b802ce --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/bin/catalina-tasks.xml @@ -0,0 +1,58 @@ + + + + + + Catalina Ant Manager, JMX and JSPC Tasks + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/aarch64/share/hadoop/httpfs/tomcat/bin/catalina.bat b/aarch64/share/hadoop/httpfs/tomcat/bin/catalina.bat new file mode 100644 index 0000000..07de2dc --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/bin/catalina.bat @@ -0,0 +1,286 @@ +@echo off +rem Licensed to the Apache Software Foundation (ASF) under one or more +rem contributor license agreements. See the NOTICE file distributed with +rem this work for additional information regarding copyright ownership. +rem The ASF licenses this file to You under the Apache License, Version 2.0 +rem (the "License"); you may not use this file except in compliance with +rem the License. You may obtain a copy of the License at +rem +rem http://www.apache.org/licenses/LICENSE-2.0 +rem +rem Unless required by applicable law or agreed to in writing, software +rem distributed under the License is distributed on an "AS IS" BASIS, +rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +rem See the License for the specific language governing permissions and +rem limitations under the License. + +if "%OS%" == "Windows_NT" setlocal +rem --------------------------------------------------------------------------- +rem Start/Stop Script for the CATALINA Server +rem +rem Environment Variable Prerequisites +rem +rem CATALINA_HOME May point at your Catalina "build" directory. +rem +rem CATALINA_BASE (Optional) Base directory for resolving dynamic portions +rem of a Catalina installation. If not present, resolves to +rem the same directory that CATALINA_HOME points to. +rem +rem CATALINA_OPTS (Optional) Java runtime options used when the "start", +rem or "run" command is executed. +rem +rem CATALINA_TMPDIR (Optional) Directory path location of temporary directory +rem the JVM should use (java.io.tmpdir). Defaults to +rem %CATALINA_BASE%\temp. +rem +rem JAVA_HOME Must point at your Java Development Kit installation. +rem Required to run the with the "debug" argument. +rem +rem JRE_HOME Must point at your Java Runtime installation. +rem Defaults to JAVA_HOME if empty. +rem +rem JAVA_OPTS (Optional) Java runtime options used when the "start", +rem "stop", or "run" command is executed. +rem +rem JAVA_ENDORSED_DIRS (Optional) Lists of of semi-colon separated directories +rem containing some jars in order to allow replacement of APIs +rem created outside of the JCP (i.e. DOM and SAX from W3C). +rem It can also be used to update the XML parser implementation. +rem Defaults to $CATALINA_HOME/endorsed. +rem +rem JPDA_TRANSPORT (Optional) JPDA transport used when the "jpda start" +rem command is executed. The default is "dt_socket". +rem +rem JPDA_ADDRESS (Optional) Java runtime options used when the "jpda start" +rem command is executed. The default is 8000. +rem +rem JPDA_SUSPEND (Optional) Java runtime options used when the "jpda start" +rem command is executed. Specifies whether JVM should suspend +rem execution immediately after startup. Default is "n". +rem +rem JPDA_OPTS (Optional) Java runtime options used when the "jpda start" +rem command is executed. If used, JPDA_TRANSPORT, JPDA_ADDRESS, +rem and JPDA_SUSPEND are ignored. Thus, all required jpda +rem options MUST be specified. The default is: +rem +rem -agentlib:jdwp=transport=%JPDA_TRANSPORT%, +rem address=%JPDA_ADDRESS%,server=y,suspend=%JPDA_SUSPEND% +rem +rem LOGGING_CONFIG (Optional) Override Tomcat's logging config file +rem Example (all one line) +rem set LOGGING_CONFIG="-Djava.util.logging.config.file=%CATALINA_BASE%\conf\logging.properties" +rem +rem LOGGING_MANAGER (Optional) Override Tomcat's logging manager +rem Example (all one line) +rem set LOGGING_MANAGER="-Djava.util.logging.manager=org.apache.juli.ClassLoaderLogManager" +rem +rem TITLE (Optional) Specify the title of Tomcat window. The default +rem TITLE is Tomcat if it's not specified. +rem Example (all one line) +rem set TITLE=Tomcat.Cluster#1.Server#1 [%DATE% %TIME%] +rem +rem +rem +rem $Id: catalina.bat 1146097 2011-07-13 15:25:05Z markt $ +rem --------------------------------------------------------------------------- + +rem Guess CATALINA_HOME if not defined +set "CURRENT_DIR=%cd%" +if not "%CATALINA_HOME%" == "" goto gotHome +set "CATALINA_HOME=%CURRENT_DIR%" +if exist "%CATALINA_HOME%\bin\catalina.bat" goto okHome +cd .. +set "CATALINA_HOME=%cd%" +cd "%CURRENT_DIR%" +:gotHome +if exist "%CATALINA_HOME%\bin\catalina.bat" goto okHome +echo The CATALINA_HOME environment variable is not defined correctly +echo This environment variable is needed to run this program +goto end +:okHome + +rem Copy CATALINA_BASE from CATALINA_HOME if not defined +if not "%CATALINA_BASE%" == "" goto gotBase +set "CATALINA_BASE=%CATALINA_HOME%" +:gotBase + +rem Ensure that any user defined CLASSPATH variables are not used on startup, +rem but allow them to be specified in setenv.bat, in rare case when it is needed. +set CLASSPATH= + +rem Get standard environment variables +if not exist "%CATALINA_BASE%\bin\setenv.bat" goto checkSetenvHome +call "%CATALINA_BASE%\bin\setenv.bat" +goto setenvDone +:checkSetenvHome +if exist "%CATALINA_HOME%\bin\setenv.bat" call "%CATALINA_HOME%\bin\setenv.bat" +:setenvDone + +rem Get standard Java environment variables +if exist "%CATALINA_HOME%\bin\setclasspath.bat" goto okSetclasspath +echo Cannot find "%CATALINA_HOME%\bin\setclasspath.bat" +echo This file is needed to run this program +goto end +:okSetclasspath +set "BASEDIR=%CATALINA_HOME%" +call "%CATALINA_HOME%\bin\setclasspath.bat" %1 +if errorlevel 1 goto end + +if not "%CATALINA_TMPDIR%" == "" goto gotTmpdir +set "CATALINA_TMPDIR=%CATALINA_BASE%\temp" +:gotTmpdir + +rem Add tomcat-juli.jar and bootstrap.jar to classpath +rem tomcat-juli.jar can be over-ridden per instance +rem Note that there are no quotes as we do not want to introduce random +rem quotes into the CLASSPATH +if "%CLASSPATH%" == "" goto emptyClasspath +set "CLASSPATH=%CLASSPATH%;" +:emptyClasspath +if "%CATALINA_BASE%" == "%CATALINA_HOME%" goto juliClasspathHome +if not exist "%CATALINA_BASE%\bin\tomcat-juli.jar" goto juliClasspathHome +set "CLASSPATH=%CLASSPATH%%CATALINA_BASE%\bin\tomcat-juli.jar;%CATALINA_HOME%\bin\bootstrap.jar" +goto juliClasspathDone +:juliClasspathHome +set "CLASSPATH=%CLASSPATH%%CATALINA_HOME%\bin\bootstrap.jar" +:juliClasspathDone + +if not "%LOGGING_CONFIG%" == "" goto noJuliConfig +set LOGGING_CONFIG=-Dnop +if not exist "%CATALINA_BASE%\conf\logging.properties" goto noJuliConfig +set LOGGING_CONFIG=-Djava.util.logging.config.file="%CATALINA_BASE%\conf\logging.properties" +:noJuliConfig +set JAVA_OPTS=%JAVA_OPTS% %LOGGING_CONFIG% + +if not "%LOGGING_MANAGER%" == "" goto noJuliManager +set LOGGING_MANAGER=-Djava.util.logging.manager=org.apache.juli.ClassLoaderLogManager +:noJuliManager +set JAVA_OPTS=%JAVA_OPTS% %LOGGING_MANAGER% + +rem ----- Execute The Requested Command --------------------------------------- + +echo Using CATALINA_BASE: "%CATALINA_BASE%" +echo Using CATALINA_HOME: "%CATALINA_HOME%" +echo Using CATALINA_TMPDIR: "%CATALINA_TMPDIR%" +if ""%1"" == ""debug"" goto use_jdk +echo Using JRE_HOME: "%JRE_HOME%" +goto java_dir_displayed +:use_jdk +echo Using JAVA_HOME: "%JAVA_HOME%" +:java_dir_displayed +echo Using CLASSPATH: "%CLASSPATH%" + +set _EXECJAVA=%_RUNJAVA% +set MAINCLASS=org.apache.catalina.startup.Bootstrap +set ACTION=start +set SECURITY_POLICY_FILE= +set DEBUG_OPTS= +set JPDA= + +if not ""%1"" == ""jpda"" goto noJpda +set JPDA=jpda +if not "%JPDA_TRANSPORT%" == "" goto gotJpdaTransport +set JPDA_TRANSPORT=dt_socket +:gotJpdaTransport +if not "%JPDA_ADDRESS%" == "" goto gotJpdaAddress +set JPDA_ADDRESS=8000 +:gotJpdaAddress +if not "%JPDA_SUSPEND%" == "" goto gotJpdaSuspend +set JPDA_SUSPEND=n +:gotJpdaSuspend +if not "%JPDA_OPTS%" == "" goto gotJpdaOpts +set JPDA_OPTS=-agentlib:jdwp=transport=%JPDA_TRANSPORT%,address=%JPDA_ADDRESS%,server=y,suspend=%JPDA_SUSPEND% +:gotJpdaOpts +shift +:noJpda + +if ""%1"" == ""debug"" goto doDebug +if ""%1"" == ""run"" goto doRun +if ""%1"" == ""start"" goto doStart +if ""%1"" == ""stop"" goto doStop +if ""%1"" == ""version"" goto doVersion + +echo Usage: catalina ( commands ... ) +echo commands: +echo debug Start Catalina in a debugger +echo debug -security Debug Catalina with a security manager +echo jpda start Start Catalina under JPDA debugger +echo run Start Catalina in the current window +echo run -security Start in the current window with security manager +echo start Start Catalina in a separate window +echo start -security Start in a separate window with security manager +echo stop Stop Catalina +echo version What version of tomcat are you running? +goto end + +:doDebug +shift +set _EXECJAVA=%_RUNJDB% +set DEBUG_OPTS=-sourcepath "%CATALINA_HOME%\..\..\java" +if not ""%1"" == ""-security"" goto execCmd +shift +echo Using Security Manager +set "SECURITY_POLICY_FILE=%CATALINA_BASE%\conf\catalina.policy" +goto execCmd + +:doRun +shift +if not ""%1"" == ""-security"" goto execCmd +shift +echo Using Security Manager +set "SECURITY_POLICY_FILE=%CATALINA_BASE%\conf\catalina.policy" +goto execCmd + +:doStart +shift +if not "%OS%" == "Windows_NT" goto noTitle +if "%TITLE%" == "" set TITLE=Tomcat +set _EXECJAVA=start "%TITLE%" %_RUNJAVA% +goto gotTitle +:noTitle +set _EXECJAVA=start %_RUNJAVA% +:gotTitle +if not ""%1"" == ""-security"" goto execCmd +shift +echo Using Security Manager +set "SECURITY_POLICY_FILE=%CATALINA_BASE%\conf\catalina.policy" +goto execCmd + +:doStop +shift +set ACTION=stop +set CATALINA_OPTS= +goto execCmd + +:doVersion +%_EXECJAVA% -classpath "%CATALINA_HOME%\lib\catalina.jar" org.apache.catalina.util.ServerInfo +goto end + + +:execCmd +rem Get remaining unshifted command line arguments and save them in the +set CMD_LINE_ARGS= +:setArgs +if ""%1""=="""" goto doneSetArgs +set CMD_LINE_ARGS=%CMD_LINE_ARGS% %1 +shift +goto setArgs +:doneSetArgs + +rem Execute Java with the applicable properties +if not "%JPDA%" == "" goto doJpda +if not "%SECURITY_POLICY_FILE%" == "" goto doSecurity +%_EXECJAVA% %JAVA_OPTS% %CATALINA_OPTS% %DEBUG_OPTS% -Djava.endorsed.dirs="%JAVA_ENDORSED_DIRS%" -classpath "%CLASSPATH%" -Dcatalina.base="%CATALINA_BASE%" -Dcatalina.home="%CATALINA_HOME%" -Djava.io.tmpdir="%CATALINA_TMPDIR%" %MAINCLASS% %CMD_LINE_ARGS% %ACTION% +goto end +:doSecurity +%_EXECJAVA% %JAVA_OPTS% %CATALINA_OPTS% %DEBUG_OPTS% -Djava.endorsed.dirs="%JAVA_ENDORSED_DIRS%" -classpath "%CLASSPATH%" -Djava.security.manager -Djava.security.policy=="%SECURITY_POLICY_FILE%" -Dcatalina.base="%CATALINA_BASE%" -Dcatalina.home="%CATALINA_HOME%" -Djava.io.tmpdir="%CATALINA_TMPDIR%" %MAINCLASS% %CMD_LINE_ARGS% %ACTION% +goto end +:doJpda +if not "%SECURITY_POLICY_FILE%" == "" goto doSecurityJpda +%_EXECJAVA% %JAVA_OPTS% %CATALINA_OPTS% %JPDA_OPTS% %DEBUG_OPTS% -Djava.endorsed.dirs="%JAVA_ENDORSED_DIRS%" -classpath "%CLASSPATH%" -Dcatalina.base="%CATALINA_BASE%" -Dcatalina.home="%CATALINA_HOME%" -Djava.io.tmpdir="%CATALINA_TMPDIR%" %MAINCLASS% %CMD_LINE_ARGS% %ACTION% +goto end +:doSecurityJpda +%_EXECJAVA% %JAVA_OPTS% %CATALINA_OPTS% %JPDA_OPTS% %DEBUG_OPTS% -Djava.endorsed.dirs="%JAVA_ENDORSED_DIRS%" -classpath "%CLASSPATH%" -Djava.security.manager -Djava.security.policy=="%SECURITY_POLICY_FILE%" -Dcatalina.base="%CATALINA_BASE%" -Dcatalina.home="%CATALINA_HOME%" -Djava.io.tmpdir="%CATALINA_TMPDIR%" %MAINCLASS% %CMD_LINE_ARGS% %ACTION% +goto end + +:end diff --git a/aarch64/share/hadoop/httpfs/tomcat/bin/catalina.sh b/aarch64/share/hadoop/httpfs/tomcat/bin/catalina.sh new file mode 100755 index 0000000..41cd97c --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/bin/catalina.sh @@ -0,0 +1,506 @@ +#!/bin/sh + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# ----------------------------------------------------------------------------- +# Start/Stop Script for the CATALINA Server +# +# Environment Variable Prerequisites +# +# CATALINA_HOME May point at your Catalina "build" directory. +# +# CATALINA_BASE (Optional) Base directory for resolving dynamic portions +# of a Catalina installation. If not present, resolves to +# the same directory that CATALINA_HOME points to. +# +# CATALINA_OUT (Optional) Full path to a file where stdout and stderr +# will be redirected. +# Default is $CATALINA_BASE/logs/catalina.out +# +# CATALINA_OPTS (Optional) Java runtime options used when the "start", +# or "run" command is executed. +# +# CATALINA_TMPDIR (Optional) Directory path location of temporary directory +# the JVM should use (java.io.tmpdir). Defaults to +# $CATALINA_BASE/temp. +# +# JAVA_HOME Must point at your Java Development Kit installation. +# Required to run the with the "debug" argument. +# +# JRE_HOME Must point at your Java Development Kit installation. +# Defaults to JAVA_HOME if empty. +# +# JAVA_OPTS (Optional) Java runtime options used when the "start", +# "stop", or "run" command is executed. +# +# JAVA_ENDORSED_DIRS (Optional) Lists of of colon separated directories +# containing some jars in order to allow replacement of APIs +# created outside of the JCP (i.e. DOM and SAX from W3C). +# It can also be used to update the XML parser implementation. +# Defaults to $CATALINA_HOME/endorsed. +# +# JPDA_TRANSPORT (Optional) JPDA transport used when the "jpda start" +# command is executed. The default is "dt_socket". +# +# JPDA_ADDRESS (Optional) Java runtime options used when the "jpda start" +# command is executed. The default is 8000. +# +# JPDA_SUSPEND (Optional) Java runtime options used when the "jpda start" +# command is executed. Specifies whether JVM should suspend +# execution immediately after startup. Default is "n". +# +# JPDA_OPTS (Optional) Java runtime options used when the "jpda start" +# command is executed. If used, JPDA_TRANSPORT, JPDA_ADDRESS, +# and JPDA_SUSPEND are ignored. Thus, all required jpda +# options MUST be specified. The default is: +# +# -agentlib:jdwp=transport=$JPDA_TRANSPORT, +# address=$JPDA_ADDRESS,server=y,suspend=$JPDA_SUSPEND +# +# CATALINA_PID (Optional) Path of the file which should contains the pid +# of catalina startup java process, when start (fork) is used +# +# LOGGING_CONFIG (Optional) Override Tomcat's logging config file +# Example (all one line) +# LOGGING_CONFIG="-Djava.util.logging.config.file=$CATALINA_BASE/conf/logging.properties" +# +# LOGGING_MANAGER (Optional) Override Tomcat's logging manager +# Example (all one line) +# LOGGING_MANAGER="-Djava.util.logging.manager=org.apache.juli.ClassLoaderLogManager" +# +# $Id: catalina.sh 1146097 2011-07-13 15:25:05Z markt $ +# ----------------------------------------------------------------------------- + +# OS specific support. $var _must_ be set to either true or false. +cygwin=false +os400=false +darwin=false +case "`uname`" in +CYGWIN*) cygwin=true;; +OS400*) os400=true;; +Darwin*) darwin=true;; +esac + +# resolve links - $0 may be a softlink +PRG="$0" + +while [ -h "$PRG" ]; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "$PRG"`/"$link" + fi +done + +# Get standard environment variables +PRGDIR=`dirname "$PRG"` + +# Only set CATALINA_HOME if not already set +[ -z "$CATALINA_HOME" ] && CATALINA_HOME=`cd "$PRGDIR/.." >/dev/null; pwd` + +# Copy CATALINA_BASE from CATALINA_HOME if not already set +[ -z "$CATALINA_BASE" ] && CATALINA_BASE="$CATALINA_HOME" + +# Ensure that any user defined CLASSPATH variables are not used on startup, +# but allow them to be specified in setenv.sh, in rare case when it is needed. +CLASSPATH= + +if [ -r "$CATALINA_BASE/bin/setenv.sh" ]; then + . "$CATALINA_BASE/bin/setenv.sh" +elif [ -r "$CATALINA_HOME/bin/setenv.sh" ]; then + . "$CATALINA_HOME/bin/setenv.sh" +fi + +# For Cygwin, ensure paths are in UNIX format before anything is touched +if $cygwin; then + [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"` + [ -n "$JRE_HOME" ] && JRE_HOME=`cygpath --unix "$JRE_HOME"` + [ -n "$CATALINA_HOME" ] && CATALINA_HOME=`cygpath --unix "$CATALINA_HOME"` + [ -n "$CATALINA_BASE" ] && CATALINA_BASE=`cygpath --unix "$CATALINA_BASE"` + [ -n "$CLASSPATH" ] && CLASSPATH=`cygpath --path --unix "$CLASSPATH"` +fi + +# For OS400 +if $os400; then + # Set job priority to standard for interactive (interactive - 6) by using + # the interactive priority - 6, the helper threads that respond to requests + # will be running at the same priority as interactive jobs. + COMMAND='chgjob job('$JOBNAME') runpty(6)' + system $COMMAND + + # Enable multi threading + export QIBM_MULTI_THREADED=Y +fi + +# Get standard Java environment variables +if $os400; then + # -r will Only work on the os400 if the files are: + # 1. owned by the user + # 2. owned by the PRIMARY group of the user + # this will not work if the user belongs in secondary groups + BASEDIR="$CATALINA_HOME" + . "$CATALINA_HOME"/bin/setclasspath.sh +else + if [ -r "$CATALINA_HOME"/bin/setclasspath.sh ]; then + BASEDIR="$CATALINA_HOME" + . "$CATALINA_HOME"/bin/setclasspath.sh + else + echo "Cannot find $CATALINA_HOME/bin/setclasspath.sh" + echo "This file is needed to run this program" + exit 1 + fi +fi + +if [ -z "$CATALINA_BASE" ] ; then + CATALINA_BASE="$CATALINA_HOME" +fi + +# Add tomcat-juli.jar and bootstrap.jar to classpath +# tomcat-juli.jar can be over-ridden per instance +if [ ! -z "$CLASSPATH" ] ; then + CLASSPATH="$CLASSPATH": +fi +if [ "$CATALINA_BASE" != "$CATALINA_HOME" ] && [ -r "$CATALINA_BASE/bin/tomcat-juli.jar" ] ; then + CLASSPATH="$CLASSPATH""$CATALINA_BASE"/bin/tomcat-juli.jar:"$CATALINA_HOME"/bin/bootstrap.jar +else + CLASSPATH="$CLASSPATH""$CATALINA_HOME"/bin/bootstrap.jar +fi + +if [ -z "$CATALINA_OUT" ] ; then + CATALINA_OUT="$CATALINA_BASE"/logs/catalina.out +fi + +if [ -z "$CATALINA_TMPDIR" ] ; then + # Define the java.io.tmpdir to use for Catalina + CATALINA_TMPDIR="$CATALINA_BASE"/temp +fi + +# Bugzilla 37848: When no TTY is available, don't output to console +have_tty=0 +if [ "`tty`" != "not a tty" ]; then + have_tty=1 +fi + +# For Cygwin, switch paths to Windows format before running java +if $cygwin; then + JAVA_HOME=`cygpath --absolute --windows "$JAVA_HOME"` + JRE_HOME=`cygpath --absolute --windows "$JRE_HOME"` + CATALINA_HOME=`cygpath --absolute --windows "$CATALINA_HOME"` + CATALINA_BASE=`cygpath --absolute --windows "$CATALINA_BASE"` + CATALINA_TMPDIR=`cygpath --absolute --windows "$CATALINA_TMPDIR"` + CLASSPATH=`cygpath --path --windows "$CLASSPATH"` + JAVA_ENDORSED_DIRS=`cygpath --path --windows "$JAVA_ENDORSED_DIRS"` +fi + +# Set juli LogManager config file if it is present and an override has not been issued +if [ -z "$LOGGING_CONFIG" ]; then + if [ -r "$CATALINA_BASE"/conf/logging.properties ]; then + LOGGING_CONFIG="-Djava.util.logging.config.file=$CATALINA_BASE/conf/logging.properties" + else + # Bugzilla 45585 + LOGGING_CONFIG="-Dnop" + fi +fi + +if [ -z "$LOGGING_MANAGER" ]; then + JAVA_OPTS="$JAVA_OPTS -Djava.util.logging.manager=org.apache.juli.ClassLoaderLogManager" +else + JAVA_OPTS="$JAVA_OPTS $LOGGING_MANAGER" +fi + +# ----- Execute The Requested Command ----------------------------------------- + +# Bugzilla 37848: only output this if we have a TTY +if [ $have_tty -eq 1 ]; then + echo "Using CATALINA_BASE: $CATALINA_BASE" + echo "Using CATALINA_HOME: $CATALINA_HOME" + echo "Using CATALINA_TMPDIR: $CATALINA_TMPDIR" + if [ "$1" = "debug" ] ; then + echo "Using JAVA_HOME: $JAVA_HOME" + else + echo "Using JRE_HOME: $JRE_HOME" + fi + echo "Using CLASSPATH: $CLASSPATH" + if [ ! -z "$CATALINA_PID" ]; then + echo "Using CATALINA_PID: $CATALINA_PID" + fi +fi + +if [ "$1" = "jpda" ] ; then + if [ -z "$JPDA_TRANSPORT" ]; then + JPDA_TRANSPORT="dt_socket" + fi + if [ -z "$JPDA_ADDRESS" ]; then + JPDA_ADDRESS="8000" + fi + if [ -z "$JPDA_SUSPEND" ]; then + JPDA_SUSPEND="n" + fi + if [ -z "$JPDA_OPTS" ]; then + JPDA_OPTS="-agentlib:jdwp=transport=$JPDA_TRANSPORT,address=$JPDA_ADDRESS,server=y,suspend=$JPDA_SUSPEND" + fi + CATALINA_OPTS="$CATALINA_OPTS $JPDA_OPTS" + shift +fi + +if [ "$1" = "debug" ] ; then + if $os400; then + echo "Debug command not available on OS400" + exit 1 + else + shift + if [ "$1" = "-security" ] ; then + if [ $have_tty -eq 1 ]; then + echo "Using Security Manager" + fi + shift + exec "$_RUNJDB" "$LOGGING_CONFIG" $JAVA_OPTS $CATALINA_OPTS \ + -Djava.endorsed.dirs="$JAVA_ENDORSED_DIRS" -classpath "$CLASSPATH" \ + -sourcepath "$CATALINA_HOME"/../../java \ + -Djava.security.manager \ + -Djava.security.policy=="$CATALINA_BASE"/conf/catalina.policy \ + -Dcatalina.base="$CATALINA_BASE" \ + -Dcatalina.home="$CATALINA_HOME" \ + -Djava.io.tmpdir="$CATALINA_TMPDIR" \ + org.apache.catalina.startup.Bootstrap "$@" start + else + exec "$_RUNJDB" "$LOGGING_CONFIG" $JAVA_OPTS $CATALINA_OPTS \ + -Djava.endorsed.dirs="$JAVA_ENDORSED_DIRS" -classpath "$CLASSPATH" \ + -sourcepath "$CATALINA_HOME"/../../java \ + -Dcatalina.base="$CATALINA_BASE" \ + -Dcatalina.home="$CATALINA_HOME" \ + -Djava.io.tmpdir="$CATALINA_TMPDIR" \ + org.apache.catalina.startup.Bootstrap "$@" start + fi + fi + +elif [ "$1" = "run" ]; then + + shift + if [ "$1" = "-security" ] ; then + if [ $have_tty -eq 1 ]; then + echo "Using Security Manager" + fi + shift + exec "$_RUNJAVA" "$LOGGING_CONFIG" $JAVA_OPTS $CATALINA_OPTS \ + -Djava.endorsed.dirs="$JAVA_ENDORSED_DIRS" -classpath "$CLASSPATH" \ + -Djava.security.manager \ + -Djava.security.policy=="$CATALINA_BASE"/conf/catalina.policy \ + -Dcatalina.base="$CATALINA_BASE" \ + -Dcatalina.home="$CATALINA_HOME" \ + -Djava.io.tmpdir="$CATALINA_TMPDIR" \ + org.apache.catalina.startup.Bootstrap "$@" start + else + exec "$_RUNJAVA" "$LOGGING_CONFIG" $JAVA_OPTS $CATALINA_OPTS \ + -Djava.endorsed.dirs="$JAVA_ENDORSED_DIRS" -classpath "$CLASSPATH" \ + -Dcatalina.base="$CATALINA_BASE" \ + -Dcatalina.home="$CATALINA_HOME" \ + -Djava.io.tmpdir="$CATALINA_TMPDIR" \ + org.apache.catalina.startup.Bootstrap "$@" start + fi + +elif [ "$1" = "start" ] ; then + + if [ ! -z "$CATALINA_PID" ]; then + if [ -f "$CATALINA_PID" ]; then + if [ -s "$CATALINA_PID" ]; then + echo "Existing PID file found during start." + if [ -r "$CATALINA_PID" ]; then + PID=`cat "$CATALINA_PID"` + ps -p $PID >/dev/null 2>&1 + if [ $? -eq 0 ] ; then + echo "Tomcat appears to still be running with PID $PID. Start aborted." + exit 1 + else + echo "Removing/clearing stale PID file." + rm -f "$CATALINA_PID" >/dev/null 2>&1 + if [ $? != 0 ]; then + if [ -w "$CATALINA_PID" ]; then + cat /dev/null > "$CATALINA_PID" + else + echo "Unable to remove or clear stale PID file. Start aborted." + exit 1 + fi + fi + fi + else + echo "Unable to read PID file. Start aborted." + exit 1 + fi + else + rm -f "$CATALINA_PID" >/dev/null 2>&1 + if [ $? != 0 ]; then + if [ ! -w "$CATALINA_PID" ]; then + echo "Unable to remove or write to empty PID file. Start aborted." + exit 1 + fi + fi + fi + fi + fi + + shift + touch "$CATALINA_OUT" + if [ "$1" = "-security" ] ; then + if [ $have_tty -eq 1 ]; then + echo "Using Security Manager" + fi + shift + "$_RUNJAVA" "$LOGGING_CONFIG" $JAVA_OPTS $CATALINA_OPTS \ + -Djava.endorsed.dirs="$JAVA_ENDORSED_DIRS" -classpath "$CLASSPATH" \ + -Djava.security.manager \ + -Djava.security.policy=="$CATALINA_BASE"/conf/catalina.policy \ + -Dcatalina.base="$CATALINA_BASE" \ + -Dcatalina.home="$CATALINA_HOME" \ + -Djava.io.tmpdir="$CATALINA_TMPDIR" \ + org.apache.catalina.startup.Bootstrap "$@" start \ + >> "$CATALINA_OUT" 2>&1 & + + else + "$_RUNJAVA" "$LOGGING_CONFIG" $JAVA_OPTS $CATALINA_OPTS \ + -Djava.endorsed.dirs="$JAVA_ENDORSED_DIRS" -classpath "$CLASSPATH" \ + -Dcatalina.base="$CATALINA_BASE" \ + -Dcatalina.home="$CATALINA_HOME" \ + -Djava.io.tmpdir="$CATALINA_TMPDIR" \ + org.apache.catalina.startup.Bootstrap "$@" start \ + >> "$CATALINA_OUT" 2>&1 & + + fi + + if [ ! -z "$CATALINA_PID" ]; then + echo $! > "$CATALINA_PID" + fi + +elif [ "$1" = "stop" ] ; then + + shift + + SLEEP=5 + if [ ! -z "$1" ]; then + echo $1 | grep "[^0-9]" >/dev/null 2>&1 + if [ $? -gt 0 ]; then + SLEEP=$1 + shift + fi + fi + + FORCE=0 + if [ "$1" = "-force" ]; then + shift + FORCE=1 + fi + + if [ ! -z "$CATALINA_PID" ]; then + if [ -f "$CATALINA_PID" ]; then + if [ -s "$CATALINA_PID" ]; then + kill -0 `cat "$CATALINA_PID"` >/dev/null 2>&1 + if [ $? -gt 0 ]; then + echo "PID file found but no matching process was found. Stop aborted." + exit 1 + fi + else + echo "PID file is empty and has been ignored." + fi + else + echo "\$CATALINA_PID was set but the specified file does not exist. Is Tomcat running? Stop aborted." + exit 1 + fi + fi + + "$_RUNJAVA" $JAVA_OPTS \ + -Djava.endorsed.dirs="$JAVA_ENDORSED_DIRS" -classpath "$CLASSPATH" \ + -Dcatalina.base="$CATALINA_BASE" \ + -Dcatalina.home="$CATALINA_HOME" \ + -Djava.io.tmpdir="$CATALINA_TMPDIR" \ + org.apache.catalina.startup.Bootstrap "$@" stop + + if [ ! -z "$CATALINA_PID" ]; then + if [ -f "$CATALINA_PID" ]; then + while [ $SLEEP -ge 0 ]; do + kill -0 `cat "$CATALINA_PID"` >/dev/null 2>&1 + if [ $? -gt 0 ]; then + rm -f "$CATALINA_PID" >/dev/null 2>&1 + if [ $? != 0 ]; then + if [ -w "$CATALINA_PID" ]; then + cat /dev/null > "$CATALINA_PID" + else + echo "Tomcat stopped but the PID file could not be removed or cleared." + fi + fi + break + fi + if [ $SLEEP -gt 0 ]; then + sleep 1 + fi + if [ $SLEEP -eq 0 ]; then + if [ $FORCE -eq 0 ]; then + echo "Tomcat did not stop in time. PID file was not removed." + fi + fi + SLEEP=`expr $SLEEP - 1 ` + done + fi + fi + + if [ $FORCE -eq 1 ]; then + if [ -z "$CATALINA_PID" ]; then + echo "Kill failed: \$CATALINA_PID not set" + else + if [ -f "$CATALINA_PID" ]; then + PID=`cat "$CATALINA_PID"` + echo "Killing Tomcat with the PID: $PID" + kill -9 $PID + rm -f "$CATALINA_PID" >/dev/null 2>&1 + if [ $? != 0 ]; then + echo "Tomcat was killed but the PID file could not be removed." + fi + fi + fi + fi + +elif [ "$1" = "version" ] ; then + + "$_RUNJAVA" \ + -classpath "$CATALINA_HOME/lib/catalina.jar" \ + org.apache.catalina.util.ServerInfo + +else + + echo "Usage: catalina.sh ( commands ... )" + echo "commands:" + if $os400; then + echo " debug Start Catalina in a debugger (not available on OS400)" + echo " debug -security Debug Catalina with a security manager (not available on OS400)" + else + echo " debug Start Catalina in a debugger" + echo " debug -security Debug Catalina with a security manager" + fi + echo " jpda start Start Catalina under JPDA debugger" + echo " run Start Catalina in the current window" + echo " run -security Start in the current window with security manager" + echo " start Start Catalina in a separate window" + echo " start -security Start in a separate window with security manager" + echo " stop Stop Catalina, waiting up to 5 seconds for the process to end" + echo " stop n Stop Catalina, waiting up to n seconds for the process to end" + echo " stop -force Stop Catalina, wait up to 5 seconds and then use kill -KILL if still running" + echo " stop n -force Stop Catalina, wait up to n seconds and then use kill -KILL if still running" + echo " version What version of tomcat are you running?" + echo "Note: Waiting for the process to end and use of the -force option require that \$CATALINA_PID is defined" + exit 1 + +fi diff --git a/aarch64/share/hadoop/httpfs/tomcat/bin/commons-daemon-native.tar.gz b/aarch64/share/hadoop/httpfs/tomcat/bin/commons-daemon-native.tar.gz new file mode 100644 index 0000000..fcb4da7 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/bin/commons-daemon-native.tar.gz differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/bin/commons-daemon.jar b/aarch64/share/hadoop/httpfs/tomcat/bin/commons-daemon.jar new file mode 100644 index 0000000..868ae2e Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/bin/commons-daemon.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/bin/cpappend.bat b/aarch64/share/hadoop/httpfs/tomcat/bin/cpappend.bat new file mode 100644 index 0000000..b9b90c1 --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/bin/cpappend.bat @@ -0,0 +1,35 @@ +@echo off +rem Licensed to the Apache Software Foundation (ASF) under one or more +rem contributor license agreements. See the NOTICE file distributed with +rem this work for additional information regarding copyright ownership. +rem The ASF licenses this file to You under the Apache License, Version 2.0 +rem (the "License"); you may not use this file except in compliance with +rem the License. You may obtain a copy of the License at +rem +rem http://www.apache.org/licenses/LICENSE-2.0 +rem +rem Unless required by applicable law or agreed to in writing, software +rem distributed under the License is distributed on an "AS IS" BASIS, +rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +rem See the License for the specific language governing permissions and +rem limitations under the License. + +rem --------------------------------------------------------------------------- +rem Append to CLASSPATH +rem +rem $Id: cpappend.bat 562770 2007-08-04 22:13:58Z markt $ +rem --------------------------------------------------------------------------- + +rem Process the first argument +if ""%1"" == """" goto end +set CLASSPATH=%CLASSPATH%;%1 +shift + +rem Process the remaining arguments +:setArgs +if ""%1"" == """" goto doneSetArgs +set CLASSPATH=%CLASSPATH% %1 +shift +goto setArgs +:doneSetArgs +:end diff --git a/aarch64/share/hadoop/httpfs/tomcat/bin/digest.bat b/aarch64/share/hadoop/httpfs/tomcat/bin/digest.bat new file mode 100644 index 0000000..32ffb3a --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/bin/digest.bat @@ -0,0 +1,56 @@ +@echo off +rem Licensed to the Apache Software Foundation (ASF) under one or more +rem contributor license agreements. See the NOTICE file distributed with +rem this work for additional information regarding copyright ownership. +rem The ASF licenses this file to You under the Apache License, Version 2.0 +rem (the "License"); you may not use this file except in compliance with +rem the License. You may obtain a copy of the License at +rem +rem http://www.apache.org/licenses/LICENSE-2.0 +rem +rem Unless required by applicable law or agreed to in writing, software +rem distributed under the License is distributed on an "AS IS" BASIS, +rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +rem See the License for the specific language governing permissions and +rem limitations under the License. + +if "%OS%" == "Windows_NT" setlocal +rem --------------------------------------------------------------------------- +rem Script to digest password using the algorithm specified +rem +rem $Id: digest.bat 908749 2010-02-10 23:26:42Z markt $ +rem --------------------------------------------------------------------------- + +rem Guess CATALINA_HOME if not defined +if not "%CATALINA_HOME%" == "" goto gotHome +set CATALINA_HOME=. +if exist "%CATALINA_HOME%\bin\tool-wrapper.bat" goto okHome +set CATALINA_HOME=.. +:gotHome +if exist "%CATALINA_HOME%\bin\tool-wrapper.bat" goto okHome +echo The CATALINA_HOME environment variable is not defined correctly +echo This environment variable is needed to run this program +goto end +:okHome + +set "EXECUTABLE=%CATALINA_HOME%\bin\tool-wrapper.bat" + +rem Check that target executable exists +if exist "%EXECUTABLE%" goto okExec +echo Cannot find "%EXECUTABLE%" +echo This file is needed to run this program +goto end +:okExec + +rem Get remaining unshifted command line arguments and save them in the +set CMD_LINE_ARGS= +:setArgs +if ""%1""=="""" goto doneSetArgs +set CMD_LINE_ARGS=%CMD_LINE_ARGS% %1 +shift +goto setArgs +:doneSetArgs + +call "%EXECUTABLE%" -server org.apache.catalina.realm.RealmBase %CMD_LINE_ARGS% + +:end diff --git a/aarch64/share/hadoop/httpfs/tomcat/bin/digest.sh b/aarch64/share/hadoop/httpfs/tomcat/bin/digest.sh new file mode 100755 index 0000000..88fd456 --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/bin/digest.sh @@ -0,0 +1,48 @@ +#!/bin/sh + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# ----------------------------------------------------------------------------- +# Script to digest password using the algorithm specified +# +# $Id: digest.sh 1130937 2011-06-03 08:27:13Z markt $ +# ----------------------------------------------------------------------------- + +# resolve links - $0 may be a softlink +PRG="$0" + +while [ -h "$PRG" ] ; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "$PRG"`/"$link" + fi +done + +PRGDIR=`dirname "$PRG"` +EXECUTABLE=tool-wrapper.sh + +# Check that target executable exists +if [ ! -x "$PRGDIR"/"$EXECUTABLE" ]; then + echo "Cannot find $PRGDIR/$EXECUTABLE" + echo "The file is absent or does not have execute permission" + echo "This file is needed to run this program" + exit 1 +fi + +exec "$PRGDIR"/"$EXECUTABLE" -server org.apache.catalina.realm.RealmBase "$@" diff --git a/aarch64/share/hadoop/httpfs/tomcat/bin/setclasspath.bat b/aarch64/share/hadoop/httpfs/tomcat/bin/setclasspath.bat new file mode 100644 index 0000000..2868cea --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/bin/setclasspath.bat @@ -0,0 +1,82 @@ +@echo off +rem Licensed to the Apache Software Foundation (ASF) under one or more +rem contributor license agreements. See the NOTICE file distributed with +rem this work for additional information regarding copyright ownership. +rem The ASF licenses this file to You under the Apache License, Version 2.0 +rem (the "License"); you may not use this file except in compliance with +rem the License. You may obtain a copy of the License at +rem +rem http://www.apache.org/licenses/LICENSE-2.0 +rem +rem Unless required by applicable law or agreed to in writing, software +rem distributed under the License is distributed on an "AS IS" BASIS, +rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +rem See the License for the specific language governing permissions and +rem limitations under the License. + +rem --------------------------------------------------------------------------- +rem Set CLASSPATH and Java options +rem +rem $Id: setclasspath.bat 908749 2010-02-10 23:26:42Z markt $ +rem --------------------------------------------------------------------------- + +rem Make sure prerequisite environment variables are set +if not "%JAVA_HOME%" == "" goto gotJdkHome +if not "%JRE_HOME%" == "" goto gotJreHome +echo Neither the JAVA_HOME nor the JRE_HOME environment variable is defined +echo At least one of these environment variable is needed to run this program +goto exit + +:gotJreHome +if not exist "%JRE_HOME%\bin\java.exe" goto noJavaHome +if not exist "%JRE_HOME%\bin\javaw.exe" goto noJavaHome +if not ""%1"" == ""debug"" goto okJavaHome +echo JAVA_HOME should point to a JDK in order to run in debug mode. +goto exit + +:gotJdkHome +if not exist "%JAVA_HOME%\bin\java.exe" goto noJavaHome +if not exist "%JAVA_HOME%\bin\javaw.exe" goto noJavaHome +if not exist "%JAVA_HOME%\bin\jdb.exe" goto noJavaHome +if not exist "%JAVA_HOME%\bin\javac.exe" goto noJavaHome +if not "%JRE_HOME%" == "" goto okJavaHome +set "JRE_HOME=%JAVA_HOME%" +goto okJavaHome + +:noJavaHome +echo The JAVA_HOME environment variable is not defined correctly +echo This environment variable is needed to run this program +echo NB: JAVA_HOME should point to a JDK not a JRE +goto exit +:okJavaHome + +if not "%BASEDIR%" == "" goto gotBasedir +echo The BASEDIR environment variable is not defined +echo This environment variable is needed to run this program +goto exit +:gotBasedir +if exist "%BASEDIR%\bin\setclasspath.bat" goto okBasedir +echo The BASEDIR environment variable is not defined correctly +echo This environment variable is needed to run this program +goto exit +:okBasedir + +rem Don't override the endorsed dir if the user has set it previously +if not "%JAVA_ENDORSED_DIRS%" == "" goto gotEndorseddir +rem Set the default -Djava.endorsed.dirs argument +set "JAVA_ENDORSED_DIRS=%BASEDIR%\endorsed" +:gotEndorseddir + +rem Set standard command for invoking Java. +rem Note that NT requires a window name argument when using start. +rem Also note the quoting as JAVA_HOME may contain spaces. +set _RUNJAVA="%JRE_HOME%\bin\java" +set _RUNJDB="%JAVA_HOME%\bin\jdb" + +goto end + +:exit +exit /b 1 + +:end +exit /b 0 diff --git a/aarch64/share/hadoop/httpfs/tomcat/bin/setclasspath.sh b/aarch64/share/hadoop/httpfs/tomcat/bin/setclasspath.sh new file mode 100755 index 0000000..b608691 --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/bin/setclasspath.sh @@ -0,0 +1,116 @@ +#!/bin/sh + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# ----------------------------------------------------------------------------- +# Set CLASSPATH and Java options +# +# $Id: setclasspath.sh 795037 2009-07-17 10:52:16Z markt $ +# ----------------------------------------------------------------------------- + +# Make sure prerequisite environment variables are set +if [ -z "$JAVA_HOME" -a -z "$JRE_HOME" ]; then + # Bugzilla 37284 (reviewed). + if $darwin; then + if [ -d "/System/Library/Frameworks/JavaVM.framework/Versions/CurrentJDK/Home" ]; then + export JAVA_HOME="/System/Library/Frameworks/JavaVM.framework/Versions/CurrentJDK/Home" + fi + else + JAVA_PATH=`which java 2>/dev/null` + if [ "x$JAVA_PATH" != "x" ]; then + JAVA_PATH=`dirname $JAVA_PATH 2>/dev/null` + JRE_HOME=`dirname $JAVA_PATH 2>/dev/null` + fi + if [ "x$JRE_HOME" = "x" ]; then + # XXX: Should we try other locations? + if [ -x /usr/bin/java ]; then + JRE_HOME=/usr + fi + fi + fi + if [ -z "$JAVA_HOME" -a -z "$JRE_HOME" ]; then + echo "Neither the JAVA_HOME nor the JRE_HOME environment variable is defined" + echo "At least one of these environment variable is needed to run this program" + exit 1 + fi +fi +if [ -z "$JAVA_HOME" -a "$1" = "debug" ]; then + echo "JAVA_HOME should point to a JDK in order to run in debug mode." + exit 1 +fi +if [ -z "$JRE_HOME" ]; then + JRE_HOME="$JAVA_HOME" +fi + +# If we're running under jdb, we need a full jdk. +if [ "$1" = "debug" ] ; then + if [ "$os400" = "true" ]; then + if [ ! -x "$JAVA_HOME"/bin/java -o ! -x "$JAVA_HOME"/bin/javac ]; then + echo "The JAVA_HOME environment variable is not defined correctly" + echo "This environment variable is needed to run this program" + echo "NB: JAVA_HOME should point to a JDK not a JRE" + exit 1 + fi + else + if [ ! -x "$JAVA_HOME"/bin/java -o ! -x "$JAVA_HOME"/bin/jdb -o ! -x "$JAVA_HOME"/bin/javac ]; then + echo "The JAVA_HOME environment variable is not defined correctly" + echo "This environment variable is needed to run this program" + echo "NB: JAVA_HOME should point to a JDK not a JRE" + exit 1 + fi + fi +fi +if [ -z "$BASEDIR" ]; then + echo "The BASEDIR environment variable is not defined" + echo "This environment variable is needed to run this program" + exit 1 +fi +if [ ! -x "$BASEDIR"/bin/setclasspath.sh ]; then + if $os400; then + # -x will Only work on the os400 if the files are: + # 1. owned by the user + # 2. owned by the PRIMARY group of the user + # this will not work if the user belongs in secondary groups + eval + else + echo "The BASEDIR environment variable is not defined correctly" + echo "This environment variable is needed to run this program" + exit 1 + fi +fi + +# Don't override the endorsed dir if the user has set it previously +if [ -z "$JAVA_ENDORSED_DIRS" ]; then + # Set the default -Djava.endorsed.dirs argument + JAVA_ENDORSED_DIRS="$BASEDIR"/endorsed +fi + +# OSX hack to CLASSPATH +JIKESPATH= +if [ `uname -s` = "Darwin" ]; then + OSXHACK="/System/Library/Frameworks/JavaVM.framework/Versions/CurrentJDK/Classes" + if [ -d "$OSXHACK" ]; then + for i in "$OSXHACK"/*.jar; do + JIKESPATH="$JIKESPATH":"$i" + done + fi +fi + +# Set standard commands for invoking Java. +_RUNJAVA="$JRE_HOME"/bin/java +if [ "$os400" != "true" ]; then + _RUNJDB="$JAVA_HOME"/bin/jdb +fi diff --git a/aarch64/share/hadoop/httpfs/tomcat/bin/shutdown.bat b/aarch64/share/hadoop/httpfs/tomcat/bin/shutdown.bat new file mode 100644 index 0000000..c1843a3 --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/bin/shutdown.bat @@ -0,0 +1,59 @@ +@echo off +rem Licensed to the Apache Software Foundation (ASF) under one or more +rem contributor license agreements. See the NOTICE file distributed with +rem this work for additional information regarding copyright ownership. +rem The ASF licenses this file to You under the Apache License, Version 2.0 +rem (the "License"); you may not use this file except in compliance with +rem the License. You may obtain a copy of the License at +rem +rem http://www.apache.org/licenses/LICENSE-2.0 +rem +rem Unless required by applicable law or agreed to in writing, software +rem distributed under the License is distributed on an "AS IS" BASIS, +rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +rem See the License for the specific language governing permissions and +rem limitations under the License. + +if "%OS%" == "Windows_NT" setlocal +rem --------------------------------------------------------------------------- +rem Stop script for the CATALINA Server +rem +rem $Id: shutdown.bat 908749 2010-02-10 23:26:42Z markt $ +rem --------------------------------------------------------------------------- + +rem Guess CATALINA_HOME if not defined +set "CURRENT_DIR=%cd%" +if not "%CATALINA_HOME%" == "" goto gotHome +set "CATALINA_HOME=%CURRENT_DIR%" +if exist "%CATALINA_HOME%\bin\catalina.bat" goto okHome +cd .. +set "CATALINA_HOME=%cd%" +cd "%CURRENT_DIR%" +:gotHome +if exist "%CATALINA_HOME%\bin\catalina.bat" goto okHome +echo The CATALINA_HOME environment variable is not defined correctly +echo This environment variable is needed to run this program +goto end +:okHome + +set "EXECUTABLE=%CATALINA_HOME%\bin\catalina.bat" + +rem Check that target executable exists +if exist "%EXECUTABLE%" goto okExec +echo Cannot find "%EXECUTABLE%" +echo This file is needed to run this program +goto end +:okExec + +rem Get remaining unshifted command line arguments and save them in the +set CMD_LINE_ARGS= +:setArgs +if ""%1""=="""" goto doneSetArgs +set CMD_LINE_ARGS=%CMD_LINE_ARGS% %1 +shift +goto setArgs +:doneSetArgs + +call "%EXECUTABLE%" stop %CMD_LINE_ARGS% + +:end diff --git a/aarch64/share/hadoop/httpfs/tomcat/bin/shutdown.sh b/aarch64/share/hadoop/httpfs/tomcat/bin/shutdown.sh new file mode 100755 index 0000000..92a8a28 --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/bin/shutdown.sh @@ -0,0 +1,48 @@ +#!/bin/sh + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# ----------------------------------------------------------------------------- +# Stop script for the CATALINA Server +# +# $Id: shutdown.sh 1130937 2011-06-03 08:27:13Z markt $ +# ----------------------------------------------------------------------------- + +# resolve links - $0 may be a softlink +PRG="$0" + +while [ -h "$PRG" ] ; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "$PRG"`/"$link" + fi +done + +PRGDIR=`dirname "$PRG"` +EXECUTABLE=catalina.sh + +# Check that target executable exists +if [ ! -x "$PRGDIR"/"$EXECUTABLE" ]; then + echo "Cannot find $PRGDIR/$EXECUTABLE" + echo "The file is absent or does not have execute permission" + echo "This file is needed to run this program" + exit 1 +fi + +exec "$PRGDIR"/"$EXECUTABLE" stop "$@" diff --git a/aarch64/share/hadoop/httpfs/tomcat/bin/startup.bat b/aarch64/share/hadoop/httpfs/tomcat/bin/startup.bat new file mode 100644 index 0000000..6f9a9b8 --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/bin/startup.bat @@ -0,0 +1,59 @@ +@echo off +rem Licensed to the Apache Software Foundation (ASF) under one or more +rem contributor license agreements. See the NOTICE file distributed with +rem this work for additional information regarding copyright ownership. +rem The ASF licenses this file to You under the Apache License, Version 2.0 +rem (the "License"); you may not use this file except in compliance with +rem the License. You may obtain a copy of the License at +rem +rem http://www.apache.org/licenses/LICENSE-2.0 +rem +rem Unless required by applicable law or agreed to in writing, software +rem distributed under the License is distributed on an "AS IS" BASIS, +rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +rem See the License for the specific language governing permissions and +rem limitations under the License. + +if "%OS%" == "Windows_NT" setlocal +rem --------------------------------------------------------------------------- +rem Start script for the CATALINA Server +rem +rem $Id: startup.bat 908749 2010-02-10 23:26:42Z markt $ +rem --------------------------------------------------------------------------- + +rem Guess CATALINA_HOME if not defined +set "CURRENT_DIR=%cd%" +if not "%CATALINA_HOME%" == "" goto gotHome +set "CATALINA_HOME=%CURRENT_DIR%" +if exist "%CATALINA_HOME%\bin\catalina.bat" goto okHome +cd .. +set "CATALINA_HOME=%cd%" +cd "%CURRENT_DIR%" +:gotHome +if exist "%CATALINA_HOME%\bin\catalina.bat" goto okHome +echo The CATALINA_HOME environment variable is not defined correctly +echo This environment variable is needed to run this program +goto end +:okHome + +set "EXECUTABLE=%CATALINA_HOME%\bin\catalina.bat" + +rem Check that target executable exists +if exist "%EXECUTABLE%" goto okExec +echo Cannot find "%EXECUTABLE%" +echo This file is needed to run this program +goto end +:okExec + +rem Get remaining unshifted command line arguments and save them in the +set CMD_LINE_ARGS= +:setArgs +if ""%1""=="""" goto doneSetArgs +set CMD_LINE_ARGS=%CMD_LINE_ARGS% %1 +shift +goto setArgs +:doneSetArgs + +call "%EXECUTABLE%" start %CMD_LINE_ARGS% + +:end diff --git a/aarch64/share/hadoop/httpfs/tomcat/bin/startup.sh b/aarch64/share/hadoop/httpfs/tomcat/bin/startup.sh new file mode 100755 index 0000000..4702636 --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/bin/startup.sh @@ -0,0 +1,65 @@ +#!/bin/sh + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# ----------------------------------------------------------------------------- +# Start Script for the CATALINA Server +# +# $Id: startup.sh 1130937 2011-06-03 08:27:13Z markt $ +# ----------------------------------------------------------------------------- + +# Better OS/400 detection: see Bugzilla 31132 +os400=false +darwin=false +case "`uname`" in +CYGWIN*) cygwin=true;; +OS400*) os400=true;; +Darwin*) darwin=true;; +esac + +# resolve links - $0 may be a softlink +PRG="$0" + +while [ -h "$PRG" ] ; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "$PRG"`/"$link" + fi +done + +PRGDIR=`dirname "$PRG"` +EXECUTABLE=catalina.sh + +# Check that target executable exists +if $os400; then + # -x will Only work on the os400 if the files are: + # 1. owned by the user + # 2. owned by the PRIMARY group of the user + # this will not work if the user belongs in secondary groups + eval +else + if [ ! -x "$PRGDIR"/"$EXECUTABLE" ]; then + echo "Cannot find $PRGDIR/$EXECUTABLE" + echo "The file is absent or does not have execute permission" + echo "This file is needed to run this program" + exit 1 + fi +fi + +exec "$PRGDIR"/"$EXECUTABLE" start "$@" diff --git a/aarch64/share/hadoop/httpfs/tomcat/bin/tomcat-juli.jar b/aarch64/share/hadoop/httpfs/tomcat/bin/tomcat-juli.jar new file mode 100644 index 0000000..2fcbafd Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/bin/tomcat-juli.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/bin/tomcat-native.tar.gz b/aarch64/share/hadoop/httpfs/tomcat/bin/tomcat-native.tar.gz new file mode 100644 index 0000000..c6a9452 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/bin/tomcat-native.tar.gz differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/bin/tool-wrapper.bat b/aarch64/share/hadoop/httpfs/tomcat/bin/tool-wrapper.bat new file mode 100644 index 0000000..e1955b8 --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/bin/tool-wrapper.bat @@ -0,0 +1,85 @@ +@echo off +rem Licensed to the Apache Software Foundation (ASF) under one or more +rem contributor license agreements. See the NOTICE file distributed with +rem this work for additional information regarding copyright ownership. +rem The ASF licenses this file to You under the Apache License, Version 2.0 +rem (the "License"); you may not use this file except in compliance with +rem the License. You may obtain a copy of the License at +rem +rem http://www.apache.org/licenses/LICENSE-2.0 +rem +rem Unless required by applicable law or agreed to in writing, software +rem distributed under the License is distributed on an "AS IS" BASIS, +rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +rem See the License for the specific language governing permissions and +rem limitations under the License. + +if "%OS%" == "Windows_NT" setlocal +rem --------------------------------------------------------------------------- +rem Wrapper script for command line tools +rem +rem Environment Variable Prerequisites +rem +rem CATALINA_HOME May point at your Catalina "build" directory. +rem +rem TOOL_OPTS (Optional) Java runtime options used when the "start", +rem "stop", or "run" command is executed. +rem +rem JAVA_HOME Must point at your Java Development Kit installation. +rem +rem JAVA_OPTS (Optional) Java runtime options used when the "start", +rem "stop", or "run" command is executed. +rem +rem $Id: tool-wrapper.bat 1040555 2010-11-30 15:00:25Z rjung $ +rem --------------------------------------------------------------------------- + +rem Guess CATALINA_HOME if not defined +if not "%CATALINA_HOME%" == "" goto gotHome +set CATALINA_HOME=. +if exist "%CATALINA_HOME%\bin\tool-wrapper.bat" goto okHome +set CATALINA_HOME=.. +:gotHome +if exist "%CATALINA_HOME%\bin\tool-wrapper.bat" goto okHome +echo The CATALINA_HOME environment variable is not defined correctly +echo This environment variable is needed to run this program +goto end +:okHome + +rem Ensure that any user defined CLASSPATH variables are not used on startup, +rem but allow them to be specified in setenv.bat, in rare case when it is needed. +set CLASSPATH= + +rem Get standard environment variables +if exist "%CATALINA_HOME%\bin\setenv.bat" call "%CATALINA_HOME%\bin\setenv.bat" + +rem Get standard Java environment variables +if exist "%CATALINA_HOME%\bin\setclasspath.bat" goto okSetclasspath +echo Cannot find "%CATALINA_HOME%\bin\setclasspath.bat" +echo This file is needed to run this program +goto end +:okSetclasspath +set "BASEDIR=%CATALINA_HOME%" +call "%CATALINA_HOME%\bin\setclasspath.bat" + +rem Add on extra jar files to CLASSPATH +rem Note that there are no quotes as we do not want to introduce random +rem quotes into the CLASSPATH +if "%CLASSPATH%" == "" goto noclasspath +set "CLASSPATH=%CLASSPATH%;%CATALINA_HOME%\bin\bootstrap.jar;%BASEDIR%\lib\servlet-api.jar" +goto okclasspath +:noclasspath +set "CLASSPATH=%CATALINA_HOME%\bin\bootstrap.jar;%BASEDIR%\lib\servlet-api.jar" +:okclasspath + +rem Get remaining unshifted command line arguments and save them in the +set CMD_LINE_ARGS= +:setArgs +if ""%1""=="""" goto doneSetArgs +set CMD_LINE_ARGS=%CMD_LINE_ARGS% %1 +shift +goto setArgs +:doneSetArgs + +%_RUNJAVA% %JAVA_OPTS% %TOOL_OPTS% -Djava.endorsed.dirs="%JAVA_ENDORSED_DIRS%" -classpath "%CLASSPATH%" -Dcatalina.home="%CATALINA_HOME%" org.apache.catalina.startup.Tool %CMD_LINE_ARGS% + +:end diff --git a/aarch64/share/hadoop/httpfs/tomcat/bin/tool-wrapper.sh b/aarch64/share/hadoop/httpfs/tomcat/bin/tool-wrapper.sh new file mode 100755 index 0000000..11ca986 --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/bin/tool-wrapper.sh @@ -0,0 +1,99 @@ +#!/bin/sh + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# ----------------------------------------------------------------------------- +# Wrapper script for command line tools +# +# Environment Variable Prerequisites +# +# CATALINA_HOME May point at your Catalina "build" directory. +# +# TOOL_OPTS (Optional) Java runtime options used when the "start", +# "stop", or "run" command is executed. +# +# JAVA_HOME Must point at your Java Development Kit installation. +# +# JAVA_OPTS (Optional) Java runtime options used when the "start", +# "stop", or "run" command is executed. +# +# $Id: tool-wrapper.sh 1040555 2010-11-30 15:00:25Z rjung $ +# ----------------------------------------------------------------------------- + +# OS specific support. $var _must_ be set to either true or false. +cygwin=false +case "`uname`" in +CYGWIN*) cygwin=true;; +esac + +# resolve links - $0 may be a softlink +PRG="$0" + +while [ -h "$PRG" ]; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "$PRG"`/"$link" + fi +done + +# Get standard environment variables +PRGDIR=`dirname "$PRG"` +CATALINA_HOME=`cd "$PRGDIR/.." >/dev/null; pwd` + +# Ensure that any user defined CLASSPATH variables are not used on startup, +# but allow them to be specified in setenv.sh, in rare case when it is needed. +CLASSPATH= + +if [ -r "$CATALINA_HOME"/bin/setenv.sh ]; then + . "$CATALINA_HOME"/bin/setenv.sh +fi + +# For Cygwin, ensure paths are in UNIX format before anything is touched +if $cygwin; then + [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"` + [ -n "$CATALINA_HOME" ] && CATALINA_HOME=`cygpath --unix "$CATALINA_HOME"` + [ -n "$CLASSPATH" ] && CLASSPATH=`cygpath --path --unix "$CLASSPATH"` +fi + +# Get standard Java environment variables +if [ -r "$CATALINA_HOME"/bin/setclasspath.sh ]; then + BASEDIR="$CATALINA_HOME" + . "$CATALINA_HOME"/bin/setclasspath.sh +else + echo "Cannot find $CATALINA_HOME/bin/setclasspath.sh" + echo "This file is needed to run this program" + exit 1 +fi + +# Add on extra jar files to CLASSPATH +CLASSPATH="$CLASSPATH":"$CATALINA_HOME"/bin/bootstrap.jar:"$BASEDIR"/lib/servlet-api.jar + +# For Cygwin, switch paths to Windows format before running java +if $cygwin; then + JAVA_HOME=`cygpath --path --windows "$JAVA_HOME"` + CATALINA_HOME=`cygpath --path --windows "$CATALINA_HOME"` + CLASSPATH=`cygpath --path --windows "$CLASSPATH"` +fi + +# ----- Execute The Requested Command ----------------------------------------- + +exec "$_RUNJAVA" $JAVA_OPTS $TOOL_OPTS \ + -Djava.endorsed.dirs="$JAVA_ENDORSED_DIRS" -classpath "$CLASSPATH" \ + -Dcatalina.home="$CATALINA_HOME" \ + org.apache.catalina.startup.Tool "$@" diff --git a/aarch64/share/hadoop/httpfs/tomcat/bin/version.bat b/aarch64/share/hadoop/httpfs/tomcat/bin/version.bat new file mode 100644 index 0000000..8197fd6 --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/bin/version.bat @@ -0,0 +1,59 @@ +@echo off +rem Licensed to the Apache Software Foundation (ASF) under one or more +rem contributor license agreements. See the NOTICE file distributed with +rem this work for additional information regarding copyright ownership. +rem The ASF licenses this file to You under the Apache License, Version 2.0 +rem (the "License"); you may not use this file except in compliance with +rem the License. You may obtain a copy of the License at +rem +rem http://www.apache.org/licenses/LICENSE-2.0 +rem +rem Unless required by applicable law or agreed to in writing, software +rem distributed under the License is distributed on an "AS IS" BASIS, +rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +rem See the License for the specific language governing permissions and +rem limitations under the License. + +if "%OS%" == "Windows_NT" setlocal +rem --------------------------------------------------------------------------- +rem Version script for the CATALINA Server +rem +rem $Id: version.bat 908749 2010-02-10 23:26:42Z markt $ +rem --------------------------------------------------------------------------- + +rem Guess CATALINA_HOME if not defined +set "CURRENT_DIR=%cd%" +if not "%CATALINA_HOME%" == "" goto gotHome +set "CATALINA_HOME=%CURRENT_DIR%" +if exist "%CATALINA_HOME%\bin\catalina.bat" goto okHome +cd .. +set "CATALINA_HOME=%cd%" +cd "%CURRENT_DIR%" +:gotHome +if exist "%CATALINA_HOME%\bin\catalina.bat" goto okHome +echo The CATALINA_HOME environment variable is not defined correctly +echo This environment variable is needed to run this program +goto end +:okHome + +set "EXECUTABLE=%CATALINA_HOME%\bin\catalina.bat" + +rem Check that target executable exists +if exist "%EXECUTABLE%" goto okExec +echo Cannot find "%EXECUTABLE%" +echo This file is needed to run this program +goto end +:okExec + +rem Get remaining unshifted command line arguments and save them in the +set CMD_LINE_ARGS= +:setArgs +if ""%1""=="""" goto doneSetArgs +set CMD_LINE_ARGS=%CMD_LINE_ARGS% %1 +shift +goto setArgs +:doneSetArgs + +call "%EXECUTABLE%" version %CMD_LINE_ARGS% + +:end diff --git a/aarch64/share/hadoop/httpfs/tomcat/bin/version.sh b/aarch64/share/hadoop/httpfs/tomcat/bin/version.sh new file mode 100755 index 0000000..1b45474 --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/bin/version.sh @@ -0,0 +1,48 @@ +#!/bin/sh + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# ----------------------------------------------------------------------------- +# Version Script for the CATALINA Server +# +# $Id: version.sh 1130937 2011-06-03 08:27:13Z markt $ +# ----------------------------------------------------------------------------- + +# resolve links - $0 may be a softlink +PRG="$0" + +while [ -h "$PRG" ] ; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "$PRG"`/"$link" + fi +done + +PRGDIR=`dirname "$PRG"` +EXECUTABLE=catalina.sh + +# Check that target executable exists +if [ ! -x "$PRGDIR"/"$EXECUTABLE" ]; then + echo "Cannot find $PRGDIR/$EXECUTABLE" + echo "The file is absent or does not have execute permission" + echo "This file is needed to run this program" + exit 1 +fi + +exec "$PRGDIR"/"$EXECUTABLE" version "$@" diff --git a/aarch64/share/hadoop/httpfs/tomcat/conf/catalina.policy b/aarch64/share/hadoop/httpfs/tomcat/conf/catalina.policy new file mode 100644 index 0000000..e58d154 --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/conf/catalina.policy @@ -0,0 +1,222 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// ============================================================================ +// catalina.policy - Security Policy Permissions for Tomcat 6 +// +// This file contains a default set of security policies to be enforced (by the +// JVM) when Catalina is executed with the "-security" option. In addition +// to the permissions granted here, the following additional permissions are +// granted to the codebase specific to each web application: +// +// * Read access to its document root directory +// * Read, write and delete access to its working directory +// +// $Id: catalina.policy 1135491 2011-06-14 11:27:38Z markt $ +// ============================================================================ + + +// ========== SYSTEM CODE PERMISSIONS ========================================= + + +// These permissions apply to javac +grant codeBase "file:${java.home}/lib/-" { + permission java.security.AllPermission; +}; + +// These permissions apply to all shared system extensions +grant codeBase "file:${java.home}/jre/lib/ext/-" { + permission java.security.AllPermission; +}; + +// These permissions apply to javac when ${java.home] points at $JAVA_HOME/jre +grant codeBase "file:${java.home}/../lib/-" { + permission java.security.AllPermission; +}; + +// These permissions apply to all shared system extensions when +// ${java.home} points at $JAVA_HOME/jre +grant codeBase "file:${java.home}/lib/ext/-" { + permission java.security.AllPermission; +}; + + +// ========== CATALINA CODE PERMISSIONS ======================================= + + +// These permissions apply to the daemon code +grant codeBase "file:${catalina.home}/bin/commons-daemon.jar" { + permission java.security.AllPermission; +}; + +// These permissions apply to the logging API +// Note: If tomcat-juli.jar is in ${catalina.base} and not in ${catalina.home}, +// update this section accordingly. +// grant codeBase "file:${catalina.base}/bin/tomcat-juli.jar" {..} +grant codeBase "file:${catalina.home}/bin/tomcat-juli.jar" { + permission java.io.FilePermission + "${java.home}${file.separator}lib${file.separator}logging.properties", "read"; + + permission java.io.FilePermission + "${catalina.base}${file.separator}conf${file.separator}logging.properties", "read"; + permission java.io.FilePermission + "${catalina.base}${file.separator}logs", "read, write"; + permission java.io.FilePermission + "${catalina.base}${file.separator}logs${file.separator}*", "read, write"; + + permission java.lang.RuntimePermission "shutdownHooks"; + permission java.lang.RuntimePermission "getClassLoader"; + permission java.lang.RuntimePermission "setContextClassLoader"; + + permission java.util.logging.LoggingPermission "control"; + + permission java.util.PropertyPermission "java.util.logging.config.class", "read"; + permission java.util.PropertyPermission "java.util.logging.config.file", "read"; + permission java.util.PropertyPermission "catalina.base", "read"; + + // Note: To enable per context logging configuration, permit read access to + // the appropriate file. Be sure that the logging configuration is + // secure before enabling such access. + // E.g. for the examples web application (uncomment and unwrap + // the following to be on a single line): + // permission java.io.FilePermission "${catalina.base}${file.separator} + // webapps${file.separator}examples${file.separator}WEB-INF + // ${file.separator}classes${file.separator}logging.properties", "read"; +}; + +// These permissions apply to the server startup code +grant codeBase "file:${catalina.home}/bin/bootstrap.jar" { + permission java.security.AllPermission; +}; + +// These permissions apply to the servlet API classes +// and those that are shared across all class loaders +// located in the "lib" directory +grant codeBase "file:${catalina.home}/lib/-" { + permission java.security.AllPermission; +}; + + +// If using a per instance lib directory, i.e. ${catalina.base}/lib, +// then the following permission will need to be uncommented +// grant codeBase "file:${catalina.base}/lib/-" { +// permission java.security.AllPermission; +// }; + + +// ========== WEB APPLICATION PERMISSIONS ===================================== + + +// These permissions are granted by default to all web applications +// In addition, a web application will be given a read FilePermission +// and JndiPermission for all files and directories in its document root. +grant { + // Required for JNDI lookup of named JDBC DataSource's and + // javamail named MimePart DataSource used to send mail + permission java.util.PropertyPermission "java.home", "read"; + permission java.util.PropertyPermission "java.naming.*", "read"; + permission java.util.PropertyPermission "javax.sql.*", "read"; + + // OS Specific properties to allow read access + permission java.util.PropertyPermission "os.name", "read"; + permission java.util.PropertyPermission "os.version", "read"; + permission java.util.PropertyPermission "os.arch", "read"; + permission java.util.PropertyPermission "file.separator", "read"; + permission java.util.PropertyPermission "path.separator", "read"; + permission java.util.PropertyPermission "line.separator", "read"; + + // JVM properties to allow read access + permission java.util.PropertyPermission "java.version", "read"; + permission java.util.PropertyPermission "java.vendor", "read"; + permission java.util.PropertyPermission "java.vendor.url", "read"; + permission java.util.PropertyPermission "java.class.version", "read"; + permission java.util.PropertyPermission "java.specification.version", "read"; + permission java.util.PropertyPermission "java.specification.vendor", "read"; + permission java.util.PropertyPermission "java.specification.name", "read"; + + permission java.util.PropertyPermission "java.vm.specification.version", "read"; + permission java.util.PropertyPermission "java.vm.specification.vendor", "read"; + permission java.util.PropertyPermission "java.vm.specification.name", "read"; + permission java.util.PropertyPermission "java.vm.version", "read"; + permission java.util.PropertyPermission "java.vm.vendor", "read"; + permission java.util.PropertyPermission "java.vm.name", "read"; + + // Required for OpenJMX + permission java.lang.RuntimePermission "getAttribute"; + + // Allow read of JAXP compliant XML parser debug + permission java.util.PropertyPermission "jaxp.debug", "read"; + + // Precompiled JSPs need access to these packages. + permission java.lang.RuntimePermission "accessClassInPackage.org.apache.jasper.el"; + permission java.lang.RuntimePermission "accessClassInPackage.org.apache.jasper.runtime"; + permission java.lang.RuntimePermission "accessClassInPackage.org.apache.jasper.runtime.*"; + + // Precompiled JSPs need access to these system properties. + permission java.util.PropertyPermission + "org.apache.jasper.runtime.BodyContentImpl.LIMIT_BUFFER", "read"; + permission java.util.PropertyPermission "org.apache.el.parser.COERCE_TO_ZERO", "read"; +}; + + +// The Manager application needs access to the following packages to support the +// session display functionality. These settings support the following +// configurations: +// - default CATALINA_HOME == CATALINA_BASE +// - CATALINA_HOME != CATALINA_BASE, per instance Manager in CATALINA_BASE +// - CATALINA_HOME != CATALINA_BASE, shared Manager in CATALINA_HOME +grant codeBase "file:${catalina.base}/webapps/manager/-" { + permission java.lang.RuntimePermission "accessClassInPackage.org.apache.catalina"; + permission java.lang.RuntimePermission "accessClassInPackage.org.apache.catalina.manager"; + permission java.lang.RuntimePermission "accessClassInPackage.org.apache.catalina.manager.util"; +}; +grant codeBase "file:${catalina.home}/webapps/manager/-" { + permission java.lang.RuntimePermission "accessClassInPackage.org.apache.catalina"; + permission java.lang.RuntimePermission "accessClassInPackage.org.apache.catalina.manager"; + permission java.lang.RuntimePermission "accessClassInPackage.org.apache.catalina.manager.util"; +}; + +// You can assign additional permissions to particular web applications by +// adding additional "grant" entries here, based on the code base for that +// application, /WEB-INF/classes/, or /WEB-INF/lib/ jar files. +// +// Different permissions can be granted to JSP pages, classes loaded from +// the /WEB-INF/classes/ directory, all jar files in the /WEB-INF/lib/ +// directory, or even to individual jar files in the /WEB-INF/lib/ directory. +// +// For instance, assume that the standard "examples" application +// included a JDBC driver that needed to establish a network connection to the +// corresponding database and used the scrape taglib to get the weather from +// the NOAA web server. You might create a "grant" entries like this: +// +// The permissions granted to the context root directory apply to JSP pages. +// grant codeBase "file:${catalina.base}/webapps/examples/-" { +// permission java.net.SocketPermission "dbhost.mycompany.com:5432", "connect"; +// permission java.net.SocketPermission "*.noaa.gov:80", "connect"; +// }; +// +// The permissions granted to the context WEB-INF/classes directory +// grant codeBase "file:${catalina.base}/webapps/examples/WEB-INF/classes/-" { +// }; +// +// The permission granted to your JDBC driver +// grant codeBase "jar:file:${catalina.base}/webapps/examples/WEB-INF/lib/driver.jar!/-" { +// permission java.net.SocketPermission "dbhost.mycompany.com:5432", "connect"; +// }; +// The permission granted to the scrape taglib +// grant codeBase "jar:file:${catalina.base}/webapps/examples/WEB-INF/lib/scrape.jar!/-" { +// permission java.net.SocketPermission "*.noaa.gov:80", "connect"; +// }; + diff --git a/aarch64/share/hadoop/httpfs/tomcat/conf/catalina.properties b/aarch64/share/hadoop/httpfs/tomcat/conf/catalina.properties new file mode 100644 index 0000000..dc2db35 --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/conf/catalina.properties @@ -0,0 +1,81 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# List of comma-separated packages that start with or equal this string +# will cause a security exception to be thrown when +# passed to checkPackageAccess unless the +# corresponding RuntimePermission ("accessClassInPackage."+package) has +# been granted. +package.access=sun.,org.apache.catalina.,org.apache.coyote.,org.apache.tomcat.,org.apache.jasper.,sun.beans. +# +# List of comma-separated packages that start with or equal this string +# will cause a security exception to be thrown when +# passed to checkPackageDefinition unless the +# corresponding RuntimePermission ("defineClassInPackage."+package) has +# been granted. +# +# by default, no packages are restricted for definition, and none of +# the class loaders supplied with the JDK call checkPackageDefinition. +# +package.definition=sun.,java.,org.apache.catalina.,org.apache.coyote.,org.apache.tomcat.,org.apache.jasper. + +# +# +# List of comma-separated paths defining the contents of the "common" +# classloader. Prefixes should be used to define what is the repository type. +# Path may be relative to the CATALINA_HOME or CATALINA_BASE path or absolute. +# If left as blank,the JVM system loader will be used as Catalina's "common" +# loader. +# Examples: +# "foo": Add this folder as a class repository +# "foo/*.jar": Add all the JARs of the specified folder as class +# repositories +# "foo/bar.jar": Add bar.jar as a class repository +common.loader=${catalina.base}/lib,${catalina.base}/lib/*.jar,${catalina.home}/lib,${catalina.home}/lib/*.jar + +# +# List of comma-separated paths defining the contents of the "server" +# classloader. Prefixes should be used to define what is the repository type. +# Path may be relative to the CATALINA_HOME or CATALINA_BASE path or absolute. +# If left as blank, the "common" loader will be used as Catalina's "server" +# loader. +# Examples: +# "foo": Add this folder as a class repository +# "foo/*.jar": Add all the JARs of the specified folder as class +# repositories +# "foo/bar.jar": Add bar.jar as a class repository +server.loader= + +# +# List of comma-separated paths defining the contents of the "shared" +# classloader. Prefixes should be used to define what is the repository type. +# Path may be relative to the CATALINA_BASE path or absolute. If left as blank, +# the "common" loader will be used as Catalina's "shared" loader. +# Examples: +# "foo": Add this folder as a class repository +# "foo/*.jar": Add all the JARs of the specified folder as class +# repositories +# "foo/bar.jar": Add bar.jar as a class repository +# Please note that for single jars, e.g. bar.jar, you need the URL form +# starting with file:. +shared.loader= + +# +# String cache configuration. +tomcat.util.buf.StringCache.byte.enabled=true +#tomcat.util.buf.StringCache.char.enabled=true +#tomcat.util.buf.StringCache.trainThreshold=500000 +#tomcat.util.buf.StringCache.cacheSize=5000 diff --git a/aarch64/share/hadoop/httpfs/tomcat/conf/context.xml b/aarch64/share/hadoop/httpfs/tomcat/conf/context.xml new file mode 100644 index 0000000..90bf554 --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/conf/context.xml @@ -0,0 +1,35 @@ + + + + + + + WEB-INF/web.xml + + + + + + + + \ No newline at end of file diff --git a/aarch64/share/hadoop/httpfs/tomcat/conf/logging.properties b/aarch64/share/hadoop/httpfs/tomcat/conf/logging.properties new file mode 100644 index 0000000..294ef74 --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/conf/logging.properties @@ -0,0 +1,67 @@ +# +# All Rights Reserved. +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +handlers = 1catalina.org.apache.juli.FileHandler, 2localhost.org.apache.juli.FileHandler, 3manager.org.apache.juli.FileHandler, 4host-manager.org.apache.juli.FileHandler, java.util.logging.ConsoleHandler + +.handlers = 1catalina.org.apache.juli.FileHandler, java.util.logging.ConsoleHandler + +############################################################ +# Handler specific properties. +# Describes specific configuration info for Handlers. +############################################################ + +1catalina.org.apache.juli.FileHandler.level = FINE +1catalina.org.apache.juli.FileHandler.directory = ${httpfs.log.dir} +1catalina.org.apache.juli.FileHandler.prefix = httpfs-catalina. + +2localhost.org.apache.juli.FileHandler.level = FINE +2localhost.org.apache.juli.FileHandler.directory = ${httpfs.log.dir} +2localhost.org.apache.juli.FileHandler.prefix = httpfs-localhost. + +3manager.org.apache.juli.FileHandler.level = FINE +3manager.org.apache.juli.FileHandler.directory = ${httpfs.log.dir} +3manager.org.apache.juli.FileHandler.prefix = httpfs-manager. + +4host-manager.org.apache.juli.FileHandler.level = FINE +4host-manager.org.apache.juli.FileHandler.directory = ${httpfs.log.dir} +4host-manager.org.apache.juli.FileHandler.prefix = httpfs-host-manager. + +java.util.logging.ConsoleHandler.level = FINE +java.util.logging.ConsoleHandler.formatter = java.util.logging.SimpleFormatter + + +############################################################ +# Facility specific properties. +# Provides extra control for each logger. +############################################################ + +org.apache.catalina.core.ContainerBase.[Catalina].[localhost].level = INFO +org.apache.catalina.core.ContainerBase.[Catalina].[localhost].handlers = 2localhost.org.apache.juli.FileHandler + +org.apache.catalina.core.ContainerBase.[Catalina].[localhost].[/manager].level = INFO +org.apache.catalina.core.ContainerBase.[Catalina].[localhost].[/manager].handlers = 3manager.org.apache.juli.FileHandler + +org.apache.catalina.core.ContainerBase.[Catalina].[localhost].[/host-manager].level = INFO +org.apache.catalina.core.ContainerBase.[Catalina].[localhost].[/host-manager].handlers = 4host-manager.org.apache.juli.FileHandler + +# For example, set the com.xyz.foo logger to only log SEVERE +# messages: +#org.apache.catalina.startup.ContextConfig.level = FINE +#org.apache.catalina.startup.HostConfig.level = FINE +#org.apache.catalina.session.ManagerBase.level = FINE +#org.apache.catalina.core.AprLifecycleListener.level=FINE diff --git a/aarch64/share/hadoop/httpfs/tomcat/conf/server.xml b/aarch64/share/hadoop/httpfs/tomcat/conf/server.xml new file mode 100644 index 0000000..a425bdd --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/conf/server.xml @@ -0,0 +1,150 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/aarch64/share/hadoop/httpfs/tomcat/conf/tomcat-users.xml b/aarch64/share/hadoop/httpfs/tomcat/conf/tomcat-users.xml new file mode 100644 index 0000000..7f022ff --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/conf/tomcat-users.xml @@ -0,0 +1,36 @@ + + + + + + + diff --git a/aarch64/share/hadoop/httpfs/tomcat/conf/web.xml b/aarch64/share/hadoop/httpfs/tomcat/conf/web.xml new file mode 100644 index 0000000..165bc7c --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/conf/web.xml @@ -0,0 +1,1249 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + default + org.apache.catalina.servlets.DefaultServlet + + debug + 0 + + + listings + false + + 1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + jsp + org.apache.jasper.servlet.JspServlet + + fork + false + + + xpoweredBy + false + + 3 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + default + / + + + + + + + + jsp + *.jsp + + + + jsp + *.jspx + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 30 + + + + + + + + + + + + abs + audio/x-mpeg + + + ai + application/postscript + + + aif + audio/x-aiff + + + aifc + audio/x-aiff + + + aiff + audio/x-aiff + + + aim + application/x-aim + + + art + image/x-jg + + + asf + video/x-ms-asf + + + asx + video/x-ms-asf + + + au + audio/basic + + + avi + video/x-msvideo + + + avx + video/x-rad-screenplay + + + bcpio + application/x-bcpio + + + bin + application/octet-stream + + + bmp + image/bmp + + + body + text/html + + + cdf + application/x-cdf + + + cer + application/x-x509-ca-cert + + + class + application/java + + + cpio + application/x-cpio + + + csh + application/x-csh + + + css + text/css + + + dib + image/bmp + + + doc + application/msword + + + dtd + application/xml-dtd + + + dv + video/x-dv + + + dvi + application/x-dvi + + + eps + application/postscript + + + etx + text/x-setext + + + exe + application/octet-stream + + + gif + image/gif + + + gtar + application/x-gtar + + + gz + application/x-gzip + + + hdf + application/x-hdf + + + hqx + application/mac-binhex40 + + + htc + text/x-component + + + htm + text/html + + + html + text/html + + + hqx + application/mac-binhex40 + + + ief + image/ief + + + jad + text/vnd.sun.j2me.app-descriptor + + + jar + application/java-archive + + + java + text/plain + + + jnlp + application/x-java-jnlp-file + + + jpe + image/jpeg + + + jpeg + image/jpeg + + + jpg + image/jpeg + + + js + text/javascript + + + jsf + text/plain + + + jspf + text/plain + + + kar + audio/x-midi + + + latex + application/x-latex + + + m3u + audio/x-mpegurl + + + mac + image/x-macpaint + + + man + application/x-troff-man + + + mathml + application/mathml+xml + + + me + application/x-troff-me + + + mid + audio/x-midi + + + midi + audio/x-midi + + + mif + application/x-mif + + + mov + video/quicktime + + + movie + video/x-sgi-movie + + + mp1 + audio/x-mpeg + + + mp2 + audio/x-mpeg + + + mp3 + audio/x-mpeg + + + mp4 + video/mp4 + + + mpa + audio/x-mpeg + + + mpe + video/mpeg + + + mpeg + video/mpeg + + + mpega + audio/x-mpeg + + + mpg + video/mpeg + + + mpv2 + video/mpeg2 + + + ms + application/x-wais-source + + + nc + application/x-netcdf + + + oda + application/oda + + + + odb + application/vnd.oasis.opendocument.database + + + + odc + application/vnd.oasis.opendocument.chart + + + + odf + application/vnd.oasis.opendocument.formula + + + + odg + application/vnd.oasis.opendocument.graphics + + + + odi + application/vnd.oasis.opendocument.image + + + + odm + application/vnd.oasis.opendocument.text-master + + + + odp + application/vnd.oasis.opendocument.presentation + + + + ods + application/vnd.oasis.opendocument.spreadsheet + + + + odt + application/vnd.oasis.opendocument.text + + + ogg + application/ogg + + + + otg + application/vnd.oasis.opendocument.graphics-template + + + + oth + application/vnd.oasis.opendocument.text-web + + + + otp + application/vnd.oasis.opendocument.presentation-template + + + + ots + application/vnd.oasis.opendocument.spreadsheet-template + + + + ott + application/vnd.oasis.opendocument.text-template + + + pbm + image/x-portable-bitmap + + + pct + image/pict + + + pdf + application/pdf + + + pgm + image/x-portable-graymap + + + pic + image/pict + + + pict + image/pict + + + pls + audio/x-scpls + + + png + image/png + + + pnm + image/x-portable-anymap + + + pnt + image/x-macpaint + + + ppm + image/x-portable-pixmap + + + ppt + application/vnd.ms-powerpoint + + + pps + application/vnd.ms-powerpoint + + + ps + application/postscript + + + psd + image/x-photoshop + + + qt + video/quicktime + + + qti + image/x-quicktime + + + qtif + image/x-quicktime + + + ras + image/x-cmu-raster + + + rdf + application/rdf+xml + + + rgb + image/x-rgb + + + rm + application/vnd.rn-realmedia + + + roff + application/x-troff + + + rtf + application/rtf + + + rtx + text/richtext + + + sh + application/x-sh + + + shar + application/x-shar + + + + smf + audio/x-midi + + + sit + application/x-stuffit + + + snd + audio/basic + + + src + application/x-wais-source + + + sv4cpio + application/x-sv4cpio + + + sv4crc + application/x-sv4crc + + + svg + image/svg+xml + + + svgz + image/svg+xml + + + swf + application/x-shockwave-flash + + + t + application/x-troff + + + tar + application/x-tar + + + tcl + application/x-tcl + + + tex + application/x-tex + + + texi + application/x-texinfo + + + texinfo + application/x-texinfo + + + tif + image/tiff + + + tiff + image/tiff + + + tr + application/x-troff + + + tsv + text/tab-separated-values + + + txt + text/plain + + + ulw + audio/basic + + + ustar + application/x-ustar + + + vxml + application/voicexml+xml + + + xbm + image/x-xbitmap + + + xht + application/xhtml+xml + + + xhtml + application/xhtml+xml + + + xls + application/vnd.ms-excel + + + xml + application/xml + + + xpm + image/x-xpixmap + + + xsl + application/xml + + + xslt + application/xslt+xml + + + xul + application/vnd.mozilla.xul+xml + + + xwd + image/x-xwindowdump + + + vsd + application/x-visio + + + wav + audio/x-wav + + + + wbmp + image/vnd.wap.wbmp + + + + wml + text/vnd.wap.wml + + + + wmlc + application/vnd.wap.wmlc + + + + wmls + text/vnd.wap.wmlscript + + + + wmlscriptc + application/vnd.wap.wmlscriptc + + + wmv + video/x-ms-wmv + + + wrl + x-world/x-vrml + + + wspolicy + application/wspolicy+xml + + + Z + application/x-compress + + + z + application/x-compress + + + zip + application/zip + + + + + + + + + + + + + + + + index.html + index.htm + index.jsp + + + diff --git a/aarch64/share/hadoop/httpfs/tomcat/lib/annotations-api.jar b/aarch64/share/hadoop/httpfs/tomcat/lib/annotations-api.jar new file mode 100644 index 0000000..8d5d8f9 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/lib/annotations-api.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/lib/catalina-ant.jar b/aarch64/share/hadoop/httpfs/tomcat/lib/catalina-ant.jar new file mode 100644 index 0000000..4646554 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/lib/catalina-ant.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/lib/catalina-ha.jar b/aarch64/share/hadoop/httpfs/tomcat/lib/catalina-ha.jar new file mode 100644 index 0000000..440291d Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/lib/catalina-ha.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/lib/catalina-tribes.jar b/aarch64/share/hadoop/httpfs/tomcat/lib/catalina-tribes.jar new file mode 100644 index 0000000..7ea726f Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/lib/catalina-tribes.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/lib/catalina.jar b/aarch64/share/hadoop/httpfs/tomcat/lib/catalina.jar new file mode 100644 index 0000000..f18c20d Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/lib/catalina.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/lib/ecj-3.7.2.jar b/aarch64/share/hadoop/httpfs/tomcat/lib/ecj-3.7.2.jar new file mode 100644 index 0000000..54ae692 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/lib/ecj-3.7.2.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/lib/el-api.jar b/aarch64/share/hadoop/httpfs/tomcat/lib/el-api.jar new file mode 100644 index 0000000..7503cda Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/lib/el-api.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/lib/jasper-el.jar b/aarch64/share/hadoop/httpfs/tomcat/lib/jasper-el.jar new file mode 100644 index 0000000..c51e275 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/lib/jasper-el.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/lib/jasper.jar b/aarch64/share/hadoop/httpfs/tomcat/lib/jasper.jar new file mode 100644 index 0000000..4728407 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/lib/jasper.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/lib/jsp-api.jar b/aarch64/share/hadoop/httpfs/tomcat/lib/jsp-api.jar new file mode 100644 index 0000000..3030459 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/lib/jsp-api.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/lib/servlet-api.jar b/aarch64/share/hadoop/httpfs/tomcat/lib/servlet-api.jar new file mode 100644 index 0000000..44f490c Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/lib/servlet-api.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/lib/tomcat-coyote.jar b/aarch64/share/hadoop/httpfs/tomcat/lib/tomcat-coyote.jar new file mode 100644 index 0000000..e0cb2be Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/lib/tomcat-coyote.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/lib/tomcat-dbcp.jar b/aarch64/share/hadoop/httpfs/tomcat/lib/tomcat-dbcp.jar new file mode 100644 index 0000000..8f85010 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/lib/tomcat-dbcp.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/lib/tomcat-i18n-es.jar b/aarch64/share/hadoop/httpfs/tomcat/lib/tomcat-i18n-es.jar new file mode 100644 index 0000000..3871d47 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/lib/tomcat-i18n-es.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/lib/tomcat-i18n-fr.jar b/aarch64/share/hadoop/httpfs/tomcat/lib/tomcat-i18n-fr.jar new file mode 100644 index 0000000..8991f2a Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/lib/tomcat-i18n-fr.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/lib/tomcat-i18n-ja.jar b/aarch64/share/hadoop/httpfs/tomcat/lib/tomcat-i18n-ja.jar new file mode 100644 index 0000000..6c7f787 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/lib/tomcat-i18n-ja.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/temp/safeToDelete.tmp b/aarch64/share/hadoop/httpfs/tomcat/temp/safeToDelete.tmp new file mode 100644 index 0000000..e69de29 diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/ROOT/WEB-INF/web.xml b/aarch64/share/hadoop/httpfs/tomcat/webapps/ROOT/WEB-INF/web.xml new file mode 100644 index 0000000..9d0ae0d --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/webapps/ROOT/WEB-INF/web.xml @@ -0,0 +1,16 @@ + + + + diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/ROOT/index.html b/aarch64/share/hadoop/httpfs/tomcat/webapps/ROOT/index.html new file mode 100644 index 0000000..2f9aa7a --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/webapps/ROOT/index.html @@ -0,0 +1,21 @@ + + + + +HttpFs service, service base URL at /webhdfs/v1. + + diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/default-log4j.properties b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/default-log4j.properties new file mode 100644 index 0000000..a0c6527 --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/default-log4j.properties @@ -0,0 +1,20 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +log4j.appender.console=org.apache.log4j.ConsoleAppender +log4j.appender.console.Target=System.err +log4j.appender.console.layout=org.apache.log4j.PatternLayout +log4j.appender.console.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n +log4j.rootLogger=INFO, console + + diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/httpfs-default.xml b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/httpfs-default.xml new file mode 100644 index 0000000..87cd730 --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/httpfs-default.xml @@ -0,0 +1,237 @@ + + + + + + + + + httpfs.buffer.size + 4096 + + The buffer size used by a read/write request when streaming data from/to + HDFS. + + + + + + + httpfs.services + + org.apache.hadoop.lib.service.instrumentation.InstrumentationService, + org.apache.hadoop.lib.service.scheduler.SchedulerService, + org.apache.hadoop.lib.service.security.GroupsService, + org.apache.hadoop.lib.service.security.ProxyUserService, + org.apache.hadoop.lib.service.security.DelegationTokenManagerService, + org.apache.hadoop.lib.service.hadoop.FileSystemAccessService + + + Services used by the httpfs server. + + + + + + + kerberos.realm + LOCALHOST + + Kerberos realm, used only if Kerberos authentication is used between + the clients and httpfs or between HttpFS and HDFS. + + This property is only used to resolve other properties within this + configuration file. + + + + + + + httpfs.hostname + ${httpfs.http.hostname} + + Property used to synthetize the HTTP Kerberos principal used by httpfs. + + This property is only used to resolve other properties within this + configuration file. + + + + + httpfs.authentication.signature.secret.file + ${httpfs.config.dir}/httpfs-signature.secret + + File containing the secret to sign HttpFS hadoop-auth cookies. + + This file should be readable only by the system user running HttpFS service. + + If multiple HttpFS servers are used in a load-balancer/round-robin fashion, + they should share the secret file. + + + + + httpfs.authentication.type + simple + + Defines the authentication mechanism used by httpfs for its HTTP clients. + + Valid values are 'simple' or 'kerberos'. + + If using 'simple' HTTP clients must specify the username with the + 'user.name' query string parameter. + + If using 'kerberos' HTTP clients must use HTTP SPNEGO or delegation tokens. + + + + + httpfs.authentication.kerberos.principal + HTTP/${httpfs.hostname}@${kerberos.realm} + + The HTTP Kerberos principal used by HttpFS in the HTTP endpoint. + + The HTTP Kerberos principal MUST start with 'HTTP/' per Kerberos + HTTP SPNEGO specification. + + + + + httpfs.authentication.kerberos.keytab + ${user.home}/httpfs.keytab + + The Kerberos keytab file with the credentials for the + HTTP Kerberos principal used by httpfs in the HTTP endpoint. + + + + + + + httpfs.proxyuser.#USER#.hosts + * + + List of hosts the '#USER#' user is allowed to perform 'doAs' + operations. + + The '#USER#' must be replaced with the username o the user who is + allowed to perform 'doAs' operations. + + The value can be the '*' wildcard or a list of hostnames. + + For multiple users copy this property and replace the user name + in the property name. + + + + + httpfs.proxyuser.#USER#.groups + * + + List of groups the '#USER#' user is allowed to impersonate users + from to perform 'doAs' operations. + + The '#USER#' must be replaced with the username o the user who is + allowed to perform 'doAs' operations. + + The value can be the '*' wildcard or a list of groups. + + For multiple users copy this property and replace the user name + in the property name. + + + + + + + httpfs.delegation.token.manager.update.interval + 86400 + + HttpFS delegation token update interval, default 1 day, in seconds. + + + + + httpfs.delegation.token.manager.max.lifetime + 604800 + + HttpFS delegation token maximum lifetime, default 7 days, in seconds + + + + + httpfs.delegation.token.manager.renewal.interval + 86400 + + HttpFS delegation token update interval, default 1 day, in seconds. + + + + + + + httpfs.hadoop.authentication.type + simple + + Defines the authentication mechanism used by httpfs to connect to + the HDFS Namenode. + + Valid values are 'simple' and 'kerberos'. + + + + + httpfs.hadoop.authentication.kerberos.keytab + ${user.home}/httpfs.keytab + + The Kerberos keytab file with the credentials for the + Kerberos principal used by httpfs to connect to the HDFS Namenode. + + + + + httpfs.hadoop.authentication.kerberos.principal + ${user.name}/${httpfs.hostname}@${kerberos.realm} + + The Kerberos principal used by httpfs to connect to the HDFS Namenode. + + + + + httpfs.hadoop.filesystem.cache.purge.frequency + 60 + + Frequency, in seconds, for the idle filesystem purging daemon runs. + + + + + httpfs.hadoop.filesystem.cache.purge.timeout + 60 + + Timeout, in seconds, for an idle filesystem to be purged. + + + + + httpfs.user.provider.user.pattern + ^[A-Za-z_][A-Za-z0-9._-]*[$]?$ + + Valid pattern for user and group names, it must be a valid java regex. + + + + diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/httpfs.properties b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/httpfs.properties new file mode 100644 index 0000000..7a1950b --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/httpfs.properties @@ -0,0 +1,21 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +httpfs.version=2.2.0 + +httpfs.source.repository=REPO NOT AVAIL +httpfs.source.revision=REVISION NOT AVAIL + +httpfs.build.username=aim +httpfs.build.timestamp=2014-02-12T09:43:09+0000 diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$1.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$1.class new file mode 100644 index 0000000..49abc0c Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$1.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$2.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$2.class new file mode 100644 index 0000000..9989a85 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$2.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$3.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$3.class new file mode 100644 index 0000000..af936a1 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$3.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$4.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$4.class new file mode 100644 index 0000000..1d84673 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$4.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$5.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$5.class new file mode 100644 index 0000000..f5cb24f Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$5.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$6.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$6.class new file mode 100644 index 0000000..e974769 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$6.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$FILE_TYPE.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$FILE_TYPE.class new file mode 100644 index 0000000..b976dbc Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$FILE_TYPE.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$HttpFSDataInputStream.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$HttpFSDataInputStream.class new file mode 100644 index 0000000..4adcb6a Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$HttpFSDataInputStream.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$HttpFSDataOutputStream.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$HttpFSDataOutputStream.class new file mode 100644 index 0000000..dfb08d9 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$HttpFSDataOutputStream.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$Operation.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$Operation.class new file mode 100644 index 0000000..aa5611a Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem$Operation.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem.class new file mode 100644 index 0000000..ea990be Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSFileSystem.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSKerberosAuthenticator$DelegationTokenOperation.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSKerberosAuthenticator$DelegationTokenOperation.class new file mode 100644 index 0000000..b689700 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSKerberosAuthenticator$DelegationTokenOperation.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSKerberosAuthenticator.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSKerberosAuthenticator.class new file mode 100644 index 0000000..269cc20 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSKerberosAuthenticator.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSPseudoAuthenticator.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSPseudoAuthenticator.class new file mode 100644 index 0000000..ac5cf2c Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSPseudoAuthenticator.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSUtils.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSUtils.class new file mode 100644 index 0000000..c9c32ca Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/client/HttpFSUtils.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/CheckUploadContentTypeFilter.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/CheckUploadContentTypeFilter.class new file mode 100644 index 0000000..740000b Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/CheckUploadContentTypeFilter.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSAppend.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSAppend.class new file mode 100644 index 0000000..d7c57ad Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSAppend.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSConcat.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSConcat.class new file mode 100644 index 0000000..327e9da Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSConcat.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSContentSummary.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSContentSummary.class new file mode 100644 index 0000000..1b3cbbf Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSContentSummary.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSCreate.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSCreate.class new file mode 100644 index 0000000..7b01b41 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSCreate.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSDelete.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSDelete.class new file mode 100644 index 0000000..6e2bfe7 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSDelete.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSFileChecksum.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSFileChecksum.class new file mode 100644 index 0000000..6c69e93 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSFileChecksum.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSFileStatus.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSFileStatus.class new file mode 100644 index 0000000..750e2e0 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSFileStatus.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSHomeDir.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSHomeDir.class new file mode 100644 index 0000000..dde4e76 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSHomeDir.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSListStatus.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSListStatus.class new file mode 100644 index 0000000..6de5623 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSListStatus.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSMkdirs.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSMkdirs.class new file mode 100644 index 0000000..4d74c68 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSMkdirs.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSOpen.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSOpen.class new file mode 100644 index 0000000..57921ff Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSOpen.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSRename.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSRename.class new file mode 100644 index 0000000..5c6a7c9 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSRename.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSSetOwner.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSSetOwner.class new file mode 100644 index 0000000..ac33241 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSSetOwner.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSSetPermission.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSSetPermission.class new file mode 100644 index 0000000..58cba4f Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSSetPermission.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSSetReplication.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSSetReplication.class new file mode 100644 index 0000000..5247ff7 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSSetReplication.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSSetTimes.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSSetTimes.class new file mode 100644 index 0000000..8165ce1 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations$FSSetTimes.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations.class new file mode 100644 index 0000000..8c482ac Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/FSOperations.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSAuthenticationFilter.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSAuthenticationFilter.class new file mode 100644 index 0000000..c74583a Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSAuthenticationFilter.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSExceptionProvider.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSExceptionProvider.class new file mode 100644 index 0000000..8ac59d0 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSExceptionProvider.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSKerberosAuthenticationHandler$1.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSKerberosAuthenticationHandler$1.class new file mode 100644 index 0000000..7d52614 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSKerberosAuthenticationHandler$1.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSKerberosAuthenticationHandler.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSKerberosAuthenticationHandler.class new file mode 100644 index 0000000..0a6ac29 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSKerberosAuthenticationHandler.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$AccessTimeParam.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$AccessTimeParam.class new file mode 100644 index 0000000..9041b77 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$AccessTimeParam.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$BlockSizeParam.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$BlockSizeParam.class new file mode 100644 index 0000000..6d96d40 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$BlockSizeParam.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$DataParam.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$DataParam.class new file mode 100644 index 0000000..7be8cfe Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$DataParam.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$DestinationParam.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$DestinationParam.class new file mode 100644 index 0000000..80efa24 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$DestinationParam.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$DoAsParam.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$DoAsParam.class new file mode 100644 index 0000000..85b1be8 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$DoAsParam.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$FilterParam.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$FilterParam.class new file mode 100644 index 0000000..9ad3d25 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$FilterParam.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$GroupParam.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$GroupParam.class new file mode 100644 index 0000000..c336620 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$GroupParam.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$LenParam.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$LenParam.class new file mode 100644 index 0000000..2cb4327 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$LenParam.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$ModifiedTimeParam.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$ModifiedTimeParam.class new file mode 100644 index 0000000..ff5ca61 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$ModifiedTimeParam.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$OffsetParam.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$OffsetParam.class new file mode 100644 index 0000000..bb9ad38 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$OffsetParam.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$OperationParam.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$OperationParam.class new file mode 100644 index 0000000..0361a57 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$OperationParam.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$OverwriteParam.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$OverwriteParam.class new file mode 100644 index 0000000..1123645 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$OverwriteParam.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$OwnerParam.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$OwnerParam.class new file mode 100644 index 0000000..fe2d7f6 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$OwnerParam.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$PermissionParam.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$PermissionParam.class new file mode 100644 index 0000000..7d44c19 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$PermissionParam.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$RecursiveParam.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$RecursiveParam.class new file mode 100644 index 0000000..d8140e8 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$RecursiveParam.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$ReplicationParam.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$ReplicationParam.class new file mode 100644 index 0000000..8e74641 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$ReplicationParam.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$SourcesParam.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$SourcesParam.class new file mode 100644 index 0000000..d97c534 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider$SourcesParam.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider.class new file mode 100644 index 0000000..93058c9 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSParametersProvider.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSReleaseFilter.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSReleaseFilter.class new file mode 100644 index 0000000..9e97913 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSReleaseFilter.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSServer$1.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSServer$1.class new file mode 100644 index 0000000..0338f6d Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSServer$1.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSServer.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSServer.class new file mode 100644 index 0000000..80698a9 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSServer.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSServerWebApp.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSServerWebApp.class new file mode 100644 index 0000000..50fbef5 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/fs/http/server/HttpFSServerWebApp.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/lang/RunnableCallable.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/lang/RunnableCallable.class new file mode 100644 index 0000000..d4c9d68 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/lang/RunnableCallable.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/lang/XException$ERROR.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/lang/XException$ERROR.class new file mode 100644 index 0000000..17ab56d Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/lang/XException$ERROR.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/lang/XException.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/lang/XException.class new file mode 100644 index 0000000..c9d7305 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/lang/XException.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/server/BaseService.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/server/BaseService.class new file mode 100644 index 0000000..e27aa3c Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/server/BaseService.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/server/Server$Status.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/server/Server$Status.class new file mode 100644 index 0000000..05c4788 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/server/Server$Status.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/server/Server.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/server/Server.class new file mode 100644 index 0000000..a6962e2 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/server/Server.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/server/ServerException$ERROR.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/server/ServerException$ERROR.class new file mode 100644 index 0000000..77be81c Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/server/ServerException$ERROR.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/server/ServerException.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/server/ServerException.class new file mode 100644 index 0000000..f2393ca Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/server/ServerException.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/server/Service.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/server/Service.class new file mode 100644 index 0000000..df904d0 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/server/Service.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/server/ServiceException.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/server/ServiceException.class new file mode 100644 index 0000000..4708dba Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/server/ServiceException.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/DelegationTokenIdentifier.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/DelegationTokenIdentifier.class new file mode 100644 index 0000000..4eb511b Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/DelegationTokenIdentifier.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/DelegationTokenManager.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/DelegationTokenManager.class new file mode 100644 index 0000000..715cab3 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/DelegationTokenManager.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/DelegationTokenManagerException$ERROR.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/DelegationTokenManagerException$ERROR.class new file mode 100644 index 0000000..5ccfff8 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/DelegationTokenManagerException$ERROR.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/DelegationTokenManagerException.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/DelegationTokenManagerException.class new file mode 100644 index 0000000..5992d5a Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/DelegationTokenManagerException.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/FileSystemAccess$FileSystemExecutor.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/FileSystemAccess$FileSystemExecutor.class new file mode 100644 index 0000000..cba6a24 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/FileSystemAccess$FileSystemExecutor.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/FileSystemAccess.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/FileSystemAccess.class new file mode 100644 index 0000000..1c60b44 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/FileSystemAccess.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/FileSystemAccessException$ERROR.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/FileSystemAccessException$ERROR.class new file mode 100644 index 0000000..d14e92e Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/FileSystemAccessException$ERROR.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/FileSystemAccessException.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/FileSystemAccessException.class new file mode 100644 index 0000000..636ad09 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/FileSystemAccessException.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/Groups.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/Groups.class new file mode 100644 index 0000000..3eb303a Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/Groups.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/Instrumentation$Cron.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/Instrumentation$Cron.class new file mode 100644 index 0000000..fa84e4a Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/Instrumentation$Cron.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/Instrumentation$Variable.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/Instrumentation$Variable.class new file mode 100644 index 0000000..2bc1179 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/Instrumentation$Variable.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/Instrumentation.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/Instrumentation.class new file mode 100644 index 0000000..862cecc Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/Instrumentation.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/ProxyUser.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/ProxyUser.class new file mode 100644 index 0000000..7cfc8b4 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/ProxyUser.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/Scheduler.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/Scheduler.class new file mode 100644 index 0000000..1861e56 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/Scheduler.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService$1.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService$1.class new file mode 100644 index 0000000..dc35f9c Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService$1.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService$2.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService$2.class new file mode 100644 index 0000000..6ae37e2 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService$2.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService$3.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService$3.class new file mode 100644 index 0000000..8045c21 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService$3.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService$4.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService$4.class new file mode 100644 index 0000000..285d621 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService$4.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService$CachedFileSystem.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService$CachedFileSystem.class new file mode 100644 index 0000000..67dfe52 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService$CachedFileSystem.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService$FileSystemCachePurger.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService$FileSystemCachePurger.class new file mode 100644 index 0000000..2bd2a59 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService$FileSystemCachePurger.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService.class new file mode 100644 index 0000000..995ab23 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$1.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$1.class new file mode 100644 index 0000000..dbc269c Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$1.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$2.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$2.class new file mode 100644 index 0000000..5665910 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$2.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$3.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$3.class new file mode 100644 index 0000000..702e047 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$3.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$Cron.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$Cron.class new file mode 100644 index 0000000..2d17a5e Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$Cron.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$Sampler.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$Sampler.class new file mode 100644 index 0000000..e4afcd3 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$Sampler.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$SamplersRunnable.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$SamplersRunnable.class new file mode 100644 index 0000000..b29cd59 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$SamplersRunnable.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$Timer.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$Timer.class new file mode 100644 index 0000000..00cf1a7 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$Timer.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$VariableHolder.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$VariableHolder.class new file mode 100644 index 0000000..a6eaea4 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService$VariableHolder.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService.class new file mode 100644 index 0000000..a924f2c Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/instrumentation/InstrumentationService.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/scheduler/SchedulerService$1.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/scheduler/SchedulerService$1.class new file mode 100644 index 0000000..9b2aaf7 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/scheduler/SchedulerService$1.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/scheduler/SchedulerService.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/scheduler/SchedulerService.class new file mode 100644 index 0000000..d38af80 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/scheduler/SchedulerService.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/security/DelegationTokenManagerService$DelegationTokenSecretManager.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/security/DelegationTokenManagerService$DelegationTokenSecretManager.class new file mode 100644 index 0000000..5f923ec Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/security/DelegationTokenManagerService$DelegationTokenSecretManager.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/security/DelegationTokenManagerService.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/security/DelegationTokenManagerService.class new file mode 100644 index 0000000..7b7b490 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/security/DelegationTokenManagerService.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/security/GroupsService.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/security/GroupsService.class new file mode 100644 index 0000000..3d86f54 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/security/GroupsService.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/security/ProxyUserService$ERROR.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/security/ProxyUserService$ERROR.class new file mode 100644 index 0000000..a6fb583 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/security/ProxyUserService$ERROR.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/security/ProxyUserService.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/security/ProxyUserService.class new file mode 100644 index 0000000..19afd8b Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/service/security/ProxyUserService.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/servlet/FileSystemReleaseFilter.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/servlet/FileSystemReleaseFilter.class new file mode 100644 index 0000000..4039a6c Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/servlet/FileSystemReleaseFilter.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/servlet/HostnameFilter.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/servlet/HostnameFilter.class new file mode 100644 index 0000000..678775d Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/servlet/HostnameFilter.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/servlet/MDCFilter.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/servlet/MDCFilter.class new file mode 100644 index 0000000..25afd60 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/servlet/MDCFilter.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/servlet/ServerWebApp.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/servlet/ServerWebApp.class new file mode 100644 index 0000000..e3c7404 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/servlet/ServerWebApp.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/util/Check.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/util/Check.class new file mode 100644 index 0000000..7932eb5 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/util/Check.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/util/ConfigurationUtils.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/util/ConfigurationUtils.class new file mode 100644 index 0000000..f8fdf87 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/util/ConfigurationUtils.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/BooleanParam.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/BooleanParam.class new file mode 100644 index 0000000..0619b81 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/BooleanParam.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/ByteParam.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/ByteParam.class new file mode 100644 index 0000000..a86448b Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/ByteParam.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/EnumParam.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/EnumParam.class new file mode 100644 index 0000000..99f1eaf Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/EnumParam.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/ExceptionProvider.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/ExceptionProvider.class new file mode 100644 index 0000000..b0c2693 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/ExceptionProvider.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/InputStreamEntity.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/InputStreamEntity.class new file mode 100644 index 0000000..34eb9ca Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/InputStreamEntity.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/IntegerParam.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/IntegerParam.class new file mode 100644 index 0000000..0a0ec02 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/IntegerParam.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/JSONMapProvider.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/JSONMapProvider.class new file mode 100644 index 0000000..f6a1c40 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/JSONMapProvider.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/JSONProvider.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/JSONProvider.class new file mode 100644 index 0000000..248f53b Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/JSONProvider.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/LongParam.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/LongParam.class new file mode 100644 index 0000000..f5a6ba3 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/LongParam.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/Param.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/Param.class new file mode 100644 index 0000000..c5d40f4 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/Param.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/Parameters.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/Parameters.class new file mode 100644 index 0000000..1d5fb1f Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/Parameters.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/ParametersProvider.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/ParametersProvider.class new file mode 100644 index 0000000..03d61a1 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/ParametersProvider.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/ShortParam.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/ShortParam.class new file mode 100644 index 0000000..386c4d9 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/ShortParam.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/StringParam.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/StringParam.class new file mode 100644 index 0000000..4aeda70 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/StringParam.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/UserProvider$1.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/UserProvider$1.class new file mode 100644 index 0000000..aceb693 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/UserProvider$1.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/UserProvider$UserParam.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/UserProvider$UserParam.class new file mode 100644 index 0000000..8d2a019 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/UserProvider$UserParam.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/UserProvider.class b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/UserProvider.class new file mode 100644 index 0000000..a85e24b Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/classes/org/apache/hadoop/lib/wsrs/UserProvider.class differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/activation-1.1.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/activation-1.1.jar new file mode 100644 index 0000000..53f82a1 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/activation-1.1.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/asm-3.2.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/asm-3.2.jar new file mode 100644 index 0000000..ca9f8d2 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/asm-3.2.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/avro-1.7.4.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/avro-1.7.4.jar new file mode 100644 index 0000000..69dd87d Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/avro-1.7.4.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-beanutils-1.7.0.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-beanutils-1.7.0.jar new file mode 100644 index 0000000..b1b89c9 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-beanutils-1.7.0.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-beanutils-core-1.8.0.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-beanutils-core-1.8.0.jar new file mode 100644 index 0000000..87c15f4 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-beanutils-core-1.8.0.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-cli-1.2.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-cli-1.2.jar new file mode 100644 index 0000000..ce4b9ff Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-cli-1.2.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-codec-1.4.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-codec-1.4.jar new file mode 100644 index 0000000..458d432 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-codec-1.4.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-collections-3.2.1.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-collections-3.2.1.jar new file mode 100644 index 0000000..c35fa1f Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-collections-3.2.1.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-compress-1.4.1.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-compress-1.4.1.jar new file mode 100644 index 0000000..b58761e Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-compress-1.4.1.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-configuration-1.6.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-configuration-1.6.jar new file mode 100644 index 0000000..2d4689a Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-configuration-1.6.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-daemon-1.0.13.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-daemon-1.0.13.jar new file mode 100644 index 0000000..ac77321 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-daemon-1.0.13.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-digester-1.8.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-digester-1.8.jar new file mode 100644 index 0000000..1110f0a Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-digester-1.8.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-io-2.1.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-io-2.1.jar new file mode 100644 index 0000000..b5c7d69 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-io-2.1.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-lang-2.5.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-lang-2.5.jar new file mode 100644 index 0000000..ae491da Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-lang-2.5.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-logging-1.1.1.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-logging-1.1.1.jar new file mode 100644 index 0000000..1deef14 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-logging-1.1.1.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-math-2.1.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-math-2.1.jar new file mode 100644 index 0000000..43b4b36 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-math-2.1.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-net-3.1.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-net-3.1.jar new file mode 100644 index 0000000..b75f1a5 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/commons-net-3.1.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/guava-11.0.2.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/guava-11.0.2.jar new file mode 100644 index 0000000..c8c8d5d Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/guava-11.0.2.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/hadoop-annotations-2.2.0.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/hadoop-annotations-2.2.0.jar new file mode 100644 index 0000000..c2d4dc1 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/hadoop-annotations-2.2.0.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/hadoop-auth-2.2.0.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/hadoop-auth-2.2.0.jar new file mode 100644 index 0000000..0787af1 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/hadoop-auth-2.2.0.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/hadoop-common-2.2.0.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/hadoop-common-2.2.0.jar new file mode 100644 index 0000000..5fb45d8 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/hadoop-common-2.2.0.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/hadoop-hdfs-2.2.0.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/hadoop-hdfs-2.2.0.jar new file mode 100644 index 0000000..95c8b8c Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/hadoop-hdfs-2.2.0.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jackson-core-asl-1.8.8.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jackson-core-asl-1.8.8.jar new file mode 100644 index 0000000..05f3353 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jackson-core-asl-1.8.8.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jackson-jaxrs-1.8.8.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jackson-jaxrs-1.8.8.jar new file mode 100644 index 0000000..21b31c2 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jackson-jaxrs-1.8.8.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jackson-mapper-asl-1.8.8.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jackson-mapper-asl-1.8.8.jar new file mode 100644 index 0000000..7c7cd21 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jackson-mapper-asl-1.8.8.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jackson-xc-1.8.8.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jackson-xc-1.8.8.jar new file mode 100644 index 0000000..ebfbf41 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jackson-xc-1.8.8.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jaxb-api-2.2.2.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jaxb-api-2.2.2.jar new file mode 100644 index 0000000..31e5fa0 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jaxb-api-2.2.2.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jaxb-impl-2.2.3-1.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jaxb-impl-2.2.3-1.jar new file mode 100644 index 0000000..eeaf660 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jaxb-impl-2.2.3-1.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jersey-core-1.9.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jersey-core-1.9.jar new file mode 100644 index 0000000..548dd88 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jersey-core-1.9.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jersey-json-1.9.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jersey-json-1.9.jar new file mode 100644 index 0000000..b1a4ce5 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jersey-json-1.9.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jersey-server-1.9.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jersey-server-1.9.jar new file mode 100644 index 0000000..ae0117c Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jersey-server-1.9.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jettison-1.1.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jettison-1.1.jar new file mode 100644 index 0000000..e4e9c8c Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jettison-1.1.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jsch-0.1.42.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jsch-0.1.42.jar new file mode 100644 index 0000000..c65eff0 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jsch-0.1.42.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/json-simple-1.1.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/json-simple-1.1.jar new file mode 100644 index 0000000..f395f41 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/json-simple-1.1.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jsr305-1.3.9.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jsr305-1.3.9.jar new file mode 100644 index 0000000..a9afc66 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jsr305-1.3.9.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/log4j-1.2.17.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/log4j-1.2.17.jar new file mode 100644 index 0000000..1d425cf Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/log4j-1.2.17.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/paranamer-2.3.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/paranamer-2.3.jar new file mode 100644 index 0000000..ad12ae9 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/paranamer-2.3.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/protobuf-java-2.5.0.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/protobuf-java-2.5.0.jar new file mode 100644 index 0000000..4c4e686 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/protobuf-java-2.5.0.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/slf4j-api-1.7.5.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/slf4j-api-1.7.5.jar new file mode 100644 index 0000000..8f004d3 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/slf4j-api-1.7.5.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/slf4j-log4j12-1.7.5.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/slf4j-log4j12-1.7.5.jar new file mode 100644 index 0000000..f5298b5 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/slf4j-log4j12-1.7.5.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/snappy-java-1.0.4.1.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/snappy-java-1.0.4.1.jar new file mode 100644 index 0000000..8198919 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/snappy-java-1.0.4.1.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/stax-api-1.0.1.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/stax-api-1.0.1.jar new file mode 100644 index 0000000..d9a1665 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/stax-api-1.0.1.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/xmlenc-0.52.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/xmlenc-0.52.jar new file mode 100644 index 0000000..ec568b4 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/xmlenc-0.52.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/xz-1.0.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/xz-1.0.jar new file mode 100644 index 0000000..a848f16 Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/xz-1.0.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/zookeeper-3.4.5.jar b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/zookeeper-3.4.5.jar new file mode 100644 index 0000000..a7966bb Binary files /dev/null and b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/zookeeper-3.4.5.jar differ diff --git a/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/web.xml b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/web.xml new file mode 100644 index 0000000..4c0b3ae --- /dev/null +++ b/aarch64/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/web.xml @@ -0,0 +1,98 @@ + + + + + + org.apache.hadoop.fs.http.server.HttpFSServerWebApp + + + + webservices-driver + com.sun.jersey.spi.container.servlet.ServletContainer + + com.sun.jersey.config.property.packages + org.apache.hadoop.fs.http.server,org.apache.hadoop.lib.wsrs + + + + + 1 + + + + webservices-driver + /* + + + + authFilter + org.apache.hadoop.fs.http.server.HttpFSAuthenticationFilter + + + + MDCFilter + org.apache.hadoop.lib.servlet.MDCFilter + + + + hostnameFilter + org.apache.hadoop.lib.servlet.HostnameFilter + + + + checkUploadContentType + org.apache.hadoop.fs.http.server.CheckUploadContentTypeFilter + + + + fsReleaseFilter + org.apache.hadoop.fs.http.server.HttpFSReleaseFilter + + + + authFilter + * + + + + MDCFilter + * + + + + hostnameFilter + * + + + + checkUploadContentType + * + + + + fsReleaseFilter + * + + + diff --git a/aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-app-2.2.0.jar b/aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-app-2.2.0.jar new file mode 100644 index 0000000..1779bda Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-app-2.2.0.jar differ diff --git a/aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-common-2.2.0.jar b/aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-common-2.2.0.jar new file mode 100644 index 0000000..97020a0 Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-common-2.2.0.jar differ diff --git a/aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-core-2.2.0.jar b/aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-core-2.2.0.jar new file mode 100644 index 0000000..2ea7bdb Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-core-2.2.0.jar differ diff --git a/aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-hs-2.2.0.jar b/aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-hs-2.2.0.jar new file mode 100644 index 0000000..d209dcf Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-hs-2.2.0.jar differ diff --git a/aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-hs-plugins-2.2.0.jar b/aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-hs-plugins-2.2.0.jar new file mode 100644 index 0000000..752c466 Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-hs-plugins-2.2.0.jar differ diff --git a/aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-2.2.0-tests.jar b/aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-2.2.0-tests.jar new file mode 100644 index 0000000..8033098 Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-2.2.0-tests.jar differ diff --git a/aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-2.2.0.jar b/aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-2.2.0.jar new file mode 100644 index 0000000..84beea4 Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-2.2.0.jar differ diff --git a/aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-shuffle-2.2.0.jar b/aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-shuffle-2.2.0.jar new file mode 100644 index 0000000..1644758 Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/hadoop-mapreduce-client-shuffle-2.2.0.jar differ diff --git a/aarch64/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.2.0.jar b/aarch64/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.2.0.jar new file mode 100644 index 0000000..b0391e2 Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.2.0.jar differ diff --git a/aarch64/share/hadoop/mapreduce/lib-examples/hsqldb-2.0.0.jar b/aarch64/share/hadoop/mapreduce/lib-examples/hsqldb-2.0.0.jar new file mode 100644 index 0000000..2da29ed Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/lib-examples/hsqldb-2.0.0.jar differ diff --git a/aarch64/share/hadoop/mapreduce/lib/aopalliance-1.0.jar b/aarch64/share/hadoop/mapreduce/lib/aopalliance-1.0.jar new file mode 100644 index 0000000..578b1a0 Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/lib/aopalliance-1.0.jar differ diff --git a/aarch64/share/hadoop/mapreduce/lib/asm-3.2.jar b/aarch64/share/hadoop/mapreduce/lib/asm-3.2.jar new file mode 100644 index 0000000..ca9f8d2 Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/lib/asm-3.2.jar differ diff --git a/aarch64/share/hadoop/mapreduce/lib/avro-1.7.4.jar b/aarch64/share/hadoop/mapreduce/lib/avro-1.7.4.jar new file mode 100644 index 0000000..69dd87d Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/lib/avro-1.7.4.jar differ diff --git a/aarch64/share/hadoop/mapreduce/lib/commons-compress-1.4.1.jar b/aarch64/share/hadoop/mapreduce/lib/commons-compress-1.4.1.jar new file mode 100644 index 0000000..b58761e Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/lib/commons-compress-1.4.1.jar differ diff --git a/aarch64/share/hadoop/mapreduce/lib/commons-io-2.1.jar b/aarch64/share/hadoop/mapreduce/lib/commons-io-2.1.jar new file mode 100644 index 0000000..b5c7d69 Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/lib/commons-io-2.1.jar differ diff --git a/aarch64/share/hadoop/mapreduce/lib/guice-3.0.jar b/aarch64/share/hadoop/mapreduce/lib/guice-3.0.jar new file mode 100644 index 0000000..f313e2b Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/lib/guice-3.0.jar differ diff --git a/aarch64/share/hadoop/mapreduce/lib/guice-servlet-3.0.jar b/aarch64/share/hadoop/mapreduce/lib/guice-servlet-3.0.jar new file mode 100644 index 0000000..bdc6614 Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/lib/guice-servlet-3.0.jar differ diff --git a/aarch64/share/hadoop/mapreduce/lib/hadoop-annotations-2.2.0.jar b/aarch64/share/hadoop/mapreduce/lib/hadoop-annotations-2.2.0.jar new file mode 100644 index 0000000..c2d4dc1 Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/lib/hadoop-annotations-2.2.0.jar differ diff --git a/aarch64/share/hadoop/mapreduce/lib/hamcrest-core-1.1.jar b/aarch64/share/hadoop/mapreduce/lib/hamcrest-core-1.1.jar new file mode 100644 index 0000000..e5149be Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/lib/hamcrest-core-1.1.jar differ diff --git a/aarch64/share/hadoop/mapreduce/lib/jackson-core-asl-1.8.8.jar b/aarch64/share/hadoop/mapreduce/lib/jackson-core-asl-1.8.8.jar new file mode 100644 index 0000000..05f3353 Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/lib/jackson-core-asl-1.8.8.jar differ diff --git a/aarch64/share/hadoop/mapreduce/lib/jackson-mapper-asl-1.8.8.jar b/aarch64/share/hadoop/mapreduce/lib/jackson-mapper-asl-1.8.8.jar new file mode 100644 index 0000000..7c7cd21 Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/lib/jackson-mapper-asl-1.8.8.jar differ diff --git a/aarch64/share/hadoop/mapreduce/lib/javax.inject-1.jar b/aarch64/share/hadoop/mapreduce/lib/javax.inject-1.jar new file mode 100644 index 0000000..b2a9d0b Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/lib/javax.inject-1.jar differ diff --git a/aarch64/share/hadoop/mapreduce/lib/jersey-core-1.9.jar b/aarch64/share/hadoop/mapreduce/lib/jersey-core-1.9.jar new file mode 100644 index 0000000..548dd88 Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/lib/jersey-core-1.9.jar differ diff --git a/aarch64/share/hadoop/mapreduce/lib/jersey-guice-1.9.jar b/aarch64/share/hadoop/mapreduce/lib/jersey-guice-1.9.jar new file mode 100644 index 0000000..cb46c94 Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/lib/jersey-guice-1.9.jar differ diff --git a/aarch64/share/hadoop/mapreduce/lib/jersey-server-1.9.jar b/aarch64/share/hadoop/mapreduce/lib/jersey-server-1.9.jar new file mode 100644 index 0000000..ae0117c Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/lib/jersey-server-1.9.jar differ diff --git a/aarch64/share/hadoop/mapreduce/lib/junit-4.10.jar b/aarch64/share/hadoop/mapreduce/lib/junit-4.10.jar new file mode 100644 index 0000000..954851e Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/lib/junit-4.10.jar differ diff --git a/aarch64/share/hadoop/mapreduce/lib/log4j-1.2.17.jar b/aarch64/share/hadoop/mapreduce/lib/log4j-1.2.17.jar new file mode 100644 index 0000000..1d425cf Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/lib/log4j-1.2.17.jar differ diff --git a/aarch64/share/hadoop/mapreduce/lib/netty-3.6.2.Final.jar b/aarch64/share/hadoop/mapreduce/lib/netty-3.6.2.Final.jar new file mode 100644 index 0000000..a421e28 Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/lib/netty-3.6.2.Final.jar differ diff --git a/aarch64/share/hadoop/mapreduce/lib/paranamer-2.3.jar b/aarch64/share/hadoop/mapreduce/lib/paranamer-2.3.jar new file mode 100644 index 0000000..ad12ae9 Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/lib/paranamer-2.3.jar differ diff --git a/aarch64/share/hadoop/mapreduce/lib/protobuf-java-2.5.0.jar b/aarch64/share/hadoop/mapreduce/lib/protobuf-java-2.5.0.jar new file mode 100644 index 0000000..4c4e686 Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/lib/protobuf-java-2.5.0.jar differ diff --git a/aarch64/share/hadoop/mapreduce/lib/snappy-java-1.0.4.1.jar b/aarch64/share/hadoop/mapreduce/lib/snappy-java-1.0.4.1.jar new file mode 100644 index 0000000..8198919 Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/lib/snappy-java-1.0.4.1.jar differ diff --git a/aarch64/share/hadoop/mapreduce/lib/xz-1.0.jar b/aarch64/share/hadoop/mapreduce/lib/xz-1.0.jar new file mode 100644 index 0000000..a848f16 Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/lib/xz-1.0.jar differ diff --git a/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-app-2.2.0-sources.jar b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-app-2.2.0-sources.jar new file mode 100644 index 0000000..e0d6fbe Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-app-2.2.0-sources.jar differ diff --git a/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-app-2.2.0-test-sources.jar b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-app-2.2.0-test-sources.jar new file mode 100644 index 0000000..3bb1e4f Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-app-2.2.0-test-sources.jar differ diff --git a/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-common-2.2.0-sources.jar b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-common-2.2.0-sources.jar new file mode 100644 index 0000000..d68ecff Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-common-2.2.0-sources.jar differ diff --git a/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-common-2.2.0-test-sources.jar b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-common-2.2.0-test-sources.jar new file mode 100644 index 0000000..d31a4c4 Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-common-2.2.0-test-sources.jar differ diff --git a/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-core-2.2.0-sources.jar b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-core-2.2.0-sources.jar new file mode 100644 index 0000000..68e2f35 Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-core-2.2.0-sources.jar differ diff --git a/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-core-2.2.0-test-sources.jar b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-core-2.2.0-test-sources.jar new file mode 100644 index 0000000..c5c857e Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-core-2.2.0-test-sources.jar differ diff --git a/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-hs-2.2.0-sources.jar b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-hs-2.2.0-sources.jar new file mode 100644 index 0000000..d62834f Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-hs-2.2.0-sources.jar differ diff --git a/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-hs-2.2.0-test-sources.jar b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-hs-2.2.0-test-sources.jar new file mode 100644 index 0000000..e8eafca Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-hs-2.2.0-test-sources.jar differ diff --git a/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-hs-plugins-2.2.0-sources.jar b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-hs-plugins-2.2.0-sources.jar new file mode 100644 index 0000000..0d769bd Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-hs-plugins-2.2.0-sources.jar differ diff --git a/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-hs-plugins-2.2.0-test-sources.jar b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-hs-plugins-2.2.0-test-sources.jar new file mode 100644 index 0000000..21065a3 Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-hs-plugins-2.2.0-test-sources.jar differ diff --git a/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-jobclient-2.2.0-sources.jar b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-jobclient-2.2.0-sources.jar new file mode 100644 index 0000000..3345dab Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-jobclient-2.2.0-sources.jar differ diff --git a/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-jobclient-2.2.0-test-sources.jar b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-jobclient-2.2.0-test-sources.jar new file mode 100644 index 0000000..5668189 Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-jobclient-2.2.0-test-sources.jar differ diff --git a/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-shuffle-2.2.0-sources.jar b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-shuffle-2.2.0-sources.jar new file mode 100644 index 0000000..fabc420 Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-shuffle-2.2.0-sources.jar differ diff --git a/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-shuffle-2.2.0-test-sources.jar b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-shuffle-2.2.0-test-sources.jar new file mode 100644 index 0000000..6108b4c Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-client-shuffle-2.2.0-test-sources.jar differ diff --git a/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-examples-2.2.0-sources.jar b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-examples-2.2.0-sources.jar new file mode 100644 index 0000000..8dc4532 Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-examples-2.2.0-sources.jar differ diff --git a/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-examples-2.2.0-test-sources.jar b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-examples-2.2.0-test-sources.jar new file mode 100644 index 0000000..3ac0ee5 Binary files /dev/null and b/aarch64/share/hadoop/mapreduce/sources/hadoop-mapreduce-examples-2.2.0-test-sources.jar differ diff --git a/aarch64/share/hadoop/tools/lib/hadoop-archives-2.2.0.jar b/aarch64/share/hadoop/tools/lib/hadoop-archives-2.2.0.jar new file mode 100644 index 0000000..83591ec Binary files /dev/null and b/aarch64/share/hadoop/tools/lib/hadoop-archives-2.2.0.jar differ diff --git a/aarch64/share/hadoop/tools/lib/hadoop-datajoin-2.2.0.jar b/aarch64/share/hadoop/tools/lib/hadoop-datajoin-2.2.0.jar new file mode 100644 index 0000000..a1aa9bd Binary files /dev/null and b/aarch64/share/hadoop/tools/lib/hadoop-datajoin-2.2.0.jar differ diff --git a/aarch64/share/hadoop/tools/lib/hadoop-distcp-2.2.0.jar b/aarch64/share/hadoop/tools/lib/hadoop-distcp-2.2.0.jar new file mode 100644 index 0000000..7bf7500 Binary files /dev/null and b/aarch64/share/hadoop/tools/lib/hadoop-distcp-2.2.0.jar differ diff --git a/aarch64/share/hadoop/tools/lib/hadoop-extras-2.2.0.jar b/aarch64/share/hadoop/tools/lib/hadoop-extras-2.2.0.jar new file mode 100644 index 0000000..6a0948e Binary files /dev/null and b/aarch64/share/hadoop/tools/lib/hadoop-extras-2.2.0.jar differ diff --git a/aarch64/share/hadoop/tools/lib/hadoop-gridmix-2.2.0.jar b/aarch64/share/hadoop/tools/lib/hadoop-gridmix-2.2.0.jar new file mode 100644 index 0000000..cb6e47b Binary files /dev/null and b/aarch64/share/hadoop/tools/lib/hadoop-gridmix-2.2.0.jar differ diff --git a/aarch64/share/hadoop/tools/lib/hadoop-rumen-2.2.0.jar b/aarch64/share/hadoop/tools/lib/hadoop-rumen-2.2.0.jar new file mode 100644 index 0000000..60c69e0 Binary files /dev/null and b/aarch64/share/hadoop/tools/lib/hadoop-rumen-2.2.0.jar differ diff --git a/aarch64/share/hadoop/tools/lib/hadoop-streaming-2.2.0.jar b/aarch64/share/hadoop/tools/lib/hadoop-streaming-2.2.0.jar new file mode 100644 index 0000000..0473d26 Binary files /dev/null and b/aarch64/share/hadoop/tools/lib/hadoop-streaming-2.2.0.jar differ diff --git a/aarch64/share/hadoop/tools/sources/hadoop-archives-2.2.0-sources.jar b/aarch64/share/hadoop/tools/sources/hadoop-archives-2.2.0-sources.jar new file mode 100644 index 0000000..5656591 Binary files /dev/null and b/aarch64/share/hadoop/tools/sources/hadoop-archives-2.2.0-sources.jar differ diff --git a/aarch64/share/hadoop/tools/sources/hadoop-archives-2.2.0-test-sources.jar b/aarch64/share/hadoop/tools/sources/hadoop-archives-2.2.0-test-sources.jar new file mode 100644 index 0000000..96bc9b6 Binary files /dev/null and b/aarch64/share/hadoop/tools/sources/hadoop-archives-2.2.0-test-sources.jar differ diff --git a/aarch64/share/hadoop/tools/sources/hadoop-datajoin-2.2.0-sources.jar b/aarch64/share/hadoop/tools/sources/hadoop-datajoin-2.2.0-sources.jar new file mode 100644 index 0000000..87e346f Binary files /dev/null and b/aarch64/share/hadoop/tools/sources/hadoop-datajoin-2.2.0-sources.jar differ diff --git a/aarch64/share/hadoop/tools/sources/hadoop-datajoin-2.2.0-test-sources.jar b/aarch64/share/hadoop/tools/sources/hadoop-datajoin-2.2.0-test-sources.jar new file mode 100644 index 0000000..5593ff3 Binary files /dev/null and b/aarch64/share/hadoop/tools/sources/hadoop-datajoin-2.2.0-test-sources.jar differ diff --git a/aarch64/share/hadoop/tools/sources/hadoop-distcp-2.2.0-sources.jar b/aarch64/share/hadoop/tools/sources/hadoop-distcp-2.2.0-sources.jar new file mode 100644 index 0000000..f74b44a Binary files /dev/null and b/aarch64/share/hadoop/tools/sources/hadoop-distcp-2.2.0-sources.jar differ diff --git a/aarch64/share/hadoop/tools/sources/hadoop-distcp-2.2.0-test-sources.jar b/aarch64/share/hadoop/tools/sources/hadoop-distcp-2.2.0-test-sources.jar new file mode 100644 index 0000000..7313ba5 Binary files /dev/null and b/aarch64/share/hadoop/tools/sources/hadoop-distcp-2.2.0-test-sources.jar differ diff --git a/aarch64/share/hadoop/tools/sources/hadoop-extras-2.2.0-sources.jar b/aarch64/share/hadoop/tools/sources/hadoop-extras-2.2.0-sources.jar new file mode 100644 index 0000000..114bd7c Binary files /dev/null and b/aarch64/share/hadoop/tools/sources/hadoop-extras-2.2.0-sources.jar differ diff --git a/aarch64/share/hadoop/tools/sources/hadoop-extras-2.2.0-test-sources.jar b/aarch64/share/hadoop/tools/sources/hadoop-extras-2.2.0-test-sources.jar new file mode 100644 index 0000000..8c0987d Binary files /dev/null and b/aarch64/share/hadoop/tools/sources/hadoop-extras-2.2.0-test-sources.jar differ diff --git a/aarch64/share/hadoop/tools/sources/hadoop-gridmix-2.2.0-sources.jar b/aarch64/share/hadoop/tools/sources/hadoop-gridmix-2.2.0-sources.jar new file mode 100644 index 0000000..01e7cf0 Binary files /dev/null and b/aarch64/share/hadoop/tools/sources/hadoop-gridmix-2.2.0-sources.jar differ diff --git a/aarch64/share/hadoop/tools/sources/hadoop-gridmix-2.2.0-test-sources.jar b/aarch64/share/hadoop/tools/sources/hadoop-gridmix-2.2.0-test-sources.jar new file mode 100644 index 0000000..ffb7a68 Binary files /dev/null and b/aarch64/share/hadoop/tools/sources/hadoop-gridmix-2.2.0-test-sources.jar differ diff --git a/aarch64/share/hadoop/tools/sources/hadoop-rumen-2.2.0-sources.jar b/aarch64/share/hadoop/tools/sources/hadoop-rumen-2.2.0-sources.jar new file mode 100644 index 0000000..ce65ab4 Binary files /dev/null and b/aarch64/share/hadoop/tools/sources/hadoop-rumen-2.2.0-sources.jar differ diff --git a/aarch64/share/hadoop/tools/sources/hadoop-rumen-2.2.0-test-sources.jar b/aarch64/share/hadoop/tools/sources/hadoop-rumen-2.2.0-test-sources.jar new file mode 100644 index 0000000..0c40543 Binary files /dev/null and b/aarch64/share/hadoop/tools/sources/hadoop-rumen-2.2.0-test-sources.jar differ diff --git a/aarch64/share/hadoop/tools/sources/hadoop-streaming-2.2.0-sources.jar b/aarch64/share/hadoop/tools/sources/hadoop-streaming-2.2.0-sources.jar new file mode 100644 index 0000000..c3e9925 Binary files /dev/null and b/aarch64/share/hadoop/tools/sources/hadoop-streaming-2.2.0-sources.jar differ diff --git a/aarch64/share/hadoop/tools/sources/hadoop-streaming-2.2.0-test-sources.jar b/aarch64/share/hadoop/tools/sources/hadoop-streaming-2.2.0-test-sources.jar new file mode 100644 index 0000000..695d93b Binary files /dev/null and b/aarch64/share/hadoop/tools/sources/hadoop-streaming-2.2.0-test-sources.jar differ diff --git a/aarch64/share/hadoop/yarn/hadoop-yarn-api-2.2.0.jar b/aarch64/share/hadoop/yarn/hadoop-yarn-api-2.2.0.jar new file mode 100644 index 0000000..1c5f204 Binary files /dev/null and b/aarch64/share/hadoop/yarn/hadoop-yarn-api-2.2.0.jar differ diff --git a/aarch64/share/hadoop/yarn/hadoop-yarn-applications-distributedshell-2.2.0.jar b/aarch64/share/hadoop/yarn/hadoop-yarn-applications-distributedshell-2.2.0.jar new file mode 100644 index 0000000..09595ed Binary files /dev/null and b/aarch64/share/hadoop/yarn/hadoop-yarn-applications-distributedshell-2.2.0.jar differ diff --git a/aarch64/share/hadoop/yarn/hadoop-yarn-applications-unmanaged-am-launcher-2.2.0.jar b/aarch64/share/hadoop/yarn/hadoop-yarn-applications-unmanaged-am-launcher-2.2.0.jar new file mode 100644 index 0000000..1b6aa76 Binary files /dev/null and b/aarch64/share/hadoop/yarn/hadoop-yarn-applications-unmanaged-am-launcher-2.2.0.jar differ diff --git a/aarch64/share/hadoop/yarn/hadoop-yarn-client-2.2.0.jar b/aarch64/share/hadoop/yarn/hadoop-yarn-client-2.2.0.jar new file mode 100644 index 0000000..d719dd4 Binary files /dev/null and b/aarch64/share/hadoop/yarn/hadoop-yarn-client-2.2.0.jar differ diff --git a/aarch64/share/hadoop/yarn/hadoop-yarn-common-2.2.0.jar b/aarch64/share/hadoop/yarn/hadoop-yarn-common-2.2.0.jar new file mode 100644 index 0000000..472cdf6 Binary files /dev/null and b/aarch64/share/hadoop/yarn/hadoop-yarn-common-2.2.0.jar differ diff --git a/aarch64/share/hadoop/yarn/hadoop-yarn-server-common-2.2.0.jar b/aarch64/share/hadoop/yarn/hadoop-yarn-server-common-2.2.0.jar new file mode 100644 index 0000000..b38c8a6 Binary files /dev/null and b/aarch64/share/hadoop/yarn/hadoop-yarn-server-common-2.2.0.jar differ diff --git a/aarch64/share/hadoop/yarn/hadoop-yarn-server-nodemanager-2.2.0.jar b/aarch64/share/hadoop/yarn/hadoop-yarn-server-nodemanager-2.2.0.jar new file mode 100644 index 0000000..d3bb4fa Binary files /dev/null and b/aarch64/share/hadoop/yarn/hadoop-yarn-server-nodemanager-2.2.0.jar differ diff --git a/aarch64/share/hadoop/yarn/hadoop-yarn-server-resourcemanager-2.2.0.jar b/aarch64/share/hadoop/yarn/hadoop-yarn-server-resourcemanager-2.2.0.jar new file mode 100644 index 0000000..99f7e0b Binary files /dev/null and b/aarch64/share/hadoop/yarn/hadoop-yarn-server-resourcemanager-2.2.0.jar differ diff --git a/aarch64/share/hadoop/yarn/hadoop-yarn-server-tests-2.2.0.jar b/aarch64/share/hadoop/yarn/hadoop-yarn-server-tests-2.2.0.jar new file mode 100644 index 0000000..d8b7aed Binary files /dev/null and b/aarch64/share/hadoop/yarn/hadoop-yarn-server-tests-2.2.0.jar differ diff --git a/aarch64/share/hadoop/yarn/hadoop-yarn-server-web-proxy-2.2.0.jar b/aarch64/share/hadoop/yarn/hadoop-yarn-server-web-proxy-2.2.0.jar new file mode 100644 index 0000000..7e617b5 Binary files /dev/null and b/aarch64/share/hadoop/yarn/hadoop-yarn-server-web-proxy-2.2.0.jar differ diff --git a/aarch64/share/hadoop/yarn/hadoop-yarn-site-2.2.0.jar b/aarch64/share/hadoop/yarn/hadoop-yarn-site-2.2.0.jar new file mode 100644 index 0000000..25383e3 Binary files /dev/null and b/aarch64/share/hadoop/yarn/hadoop-yarn-site-2.2.0.jar differ diff --git a/aarch64/share/hadoop/yarn/lib-examples/hsqldb-2.0.0.jar b/aarch64/share/hadoop/yarn/lib-examples/hsqldb-2.0.0.jar new file mode 100644 index 0000000..2da29ed Binary files /dev/null and b/aarch64/share/hadoop/yarn/lib-examples/hsqldb-2.0.0.jar differ diff --git a/aarch64/share/hadoop/yarn/lib/aopalliance-1.0.jar b/aarch64/share/hadoop/yarn/lib/aopalliance-1.0.jar new file mode 100644 index 0000000..578b1a0 Binary files /dev/null and b/aarch64/share/hadoop/yarn/lib/aopalliance-1.0.jar differ diff --git a/aarch64/share/hadoop/yarn/lib/asm-3.2.jar b/aarch64/share/hadoop/yarn/lib/asm-3.2.jar new file mode 100644 index 0000000..ca9f8d2 Binary files /dev/null and b/aarch64/share/hadoop/yarn/lib/asm-3.2.jar differ diff --git a/aarch64/share/hadoop/yarn/lib/avro-1.7.4.jar b/aarch64/share/hadoop/yarn/lib/avro-1.7.4.jar new file mode 100644 index 0000000..69dd87d Binary files /dev/null and b/aarch64/share/hadoop/yarn/lib/avro-1.7.4.jar differ diff --git a/aarch64/share/hadoop/yarn/lib/commons-compress-1.4.1.jar b/aarch64/share/hadoop/yarn/lib/commons-compress-1.4.1.jar new file mode 100644 index 0000000..b58761e Binary files /dev/null and b/aarch64/share/hadoop/yarn/lib/commons-compress-1.4.1.jar differ diff --git a/aarch64/share/hadoop/yarn/lib/commons-io-2.1.jar b/aarch64/share/hadoop/yarn/lib/commons-io-2.1.jar new file mode 100644 index 0000000..b5c7d69 Binary files /dev/null and b/aarch64/share/hadoop/yarn/lib/commons-io-2.1.jar differ diff --git a/aarch64/share/hadoop/yarn/lib/guice-3.0.jar b/aarch64/share/hadoop/yarn/lib/guice-3.0.jar new file mode 100644 index 0000000..f313e2b Binary files /dev/null and b/aarch64/share/hadoop/yarn/lib/guice-3.0.jar differ diff --git a/aarch64/share/hadoop/yarn/lib/guice-servlet-3.0.jar b/aarch64/share/hadoop/yarn/lib/guice-servlet-3.0.jar new file mode 100644 index 0000000..bdc6614 Binary files /dev/null and b/aarch64/share/hadoop/yarn/lib/guice-servlet-3.0.jar differ diff --git a/aarch64/share/hadoop/yarn/lib/hadoop-annotations-2.2.0.jar b/aarch64/share/hadoop/yarn/lib/hadoop-annotations-2.2.0.jar new file mode 100644 index 0000000..c2d4dc1 Binary files /dev/null and b/aarch64/share/hadoop/yarn/lib/hadoop-annotations-2.2.0.jar differ diff --git a/aarch64/share/hadoop/yarn/lib/hamcrest-core-1.1.jar b/aarch64/share/hadoop/yarn/lib/hamcrest-core-1.1.jar new file mode 100644 index 0000000..e5149be Binary files /dev/null and b/aarch64/share/hadoop/yarn/lib/hamcrest-core-1.1.jar differ diff --git a/aarch64/share/hadoop/yarn/lib/jackson-core-asl-1.8.8.jar b/aarch64/share/hadoop/yarn/lib/jackson-core-asl-1.8.8.jar new file mode 100644 index 0000000..05f3353 Binary files /dev/null and b/aarch64/share/hadoop/yarn/lib/jackson-core-asl-1.8.8.jar differ diff --git a/aarch64/share/hadoop/yarn/lib/jackson-mapper-asl-1.8.8.jar b/aarch64/share/hadoop/yarn/lib/jackson-mapper-asl-1.8.8.jar new file mode 100644 index 0000000..7c7cd21 Binary files /dev/null and b/aarch64/share/hadoop/yarn/lib/jackson-mapper-asl-1.8.8.jar differ diff --git a/aarch64/share/hadoop/yarn/lib/javax.inject-1.jar b/aarch64/share/hadoop/yarn/lib/javax.inject-1.jar new file mode 100644 index 0000000..b2a9d0b Binary files /dev/null and b/aarch64/share/hadoop/yarn/lib/javax.inject-1.jar differ diff --git a/aarch64/share/hadoop/yarn/lib/jersey-core-1.9.jar b/aarch64/share/hadoop/yarn/lib/jersey-core-1.9.jar new file mode 100644 index 0000000..548dd88 Binary files /dev/null and b/aarch64/share/hadoop/yarn/lib/jersey-core-1.9.jar differ diff --git a/aarch64/share/hadoop/yarn/lib/jersey-guice-1.9.jar b/aarch64/share/hadoop/yarn/lib/jersey-guice-1.9.jar new file mode 100644 index 0000000..cb46c94 Binary files /dev/null and b/aarch64/share/hadoop/yarn/lib/jersey-guice-1.9.jar differ diff --git a/aarch64/share/hadoop/yarn/lib/jersey-server-1.9.jar b/aarch64/share/hadoop/yarn/lib/jersey-server-1.9.jar new file mode 100644 index 0000000..ae0117c Binary files /dev/null and b/aarch64/share/hadoop/yarn/lib/jersey-server-1.9.jar differ diff --git a/aarch64/share/hadoop/yarn/lib/junit-4.10.jar b/aarch64/share/hadoop/yarn/lib/junit-4.10.jar new file mode 100644 index 0000000..954851e Binary files /dev/null and b/aarch64/share/hadoop/yarn/lib/junit-4.10.jar differ diff --git a/aarch64/share/hadoop/yarn/lib/log4j-1.2.17.jar b/aarch64/share/hadoop/yarn/lib/log4j-1.2.17.jar new file mode 100644 index 0000000..1d425cf Binary files /dev/null and b/aarch64/share/hadoop/yarn/lib/log4j-1.2.17.jar differ diff --git a/aarch64/share/hadoop/yarn/lib/netty-3.6.2.Final.jar b/aarch64/share/hadoop/yarn/lib/netty-3.6.2.Final.jar new file mode 100644 index 0000000..a421e28 Binary files /dev/null and b/aarch64/share/hadoop/yarn/lib/netty-3.6.2.Final.jar differ diff --git a/aarch64/share/hadoop/yarn/lib/paranamer-2.3.jar b/aarch64/share/hadoop/yarn/lib/paranamer-2.3.jar new file mode 100644 index 0000000..ad12ae9 Binary files /dev/null and b/aarch64/share/hadoop/yarn/lib/paranamer-2.3.jar differ diff --git a/aarch64/share/hadoop/yarn/lib/protobuf-java-2.5.0.jar b/aarch64/share/hadoop/yarn/lib/protobuf-java-2.5.0.jar new file mode 100644 index 0000000..4c4e686 Binary files /dev/null and b/aarch64/share/hadoop/yarn/lib/protobuf-java-2.5.0.jar differ diff --git a/aarch64/share/hadoop/yarn/lib/snappy-java-1.0.4.1.jar b/aarch64/share/hadoop/yarn/lib/snappy-java-1.0.4.1.jar new file mode 100644 index 0000000..8198919 Binary files /dev/null and b/aarch64/share/hadoop/yarn/lib/snappy-java-1.0.4.1.jar differ diff --git a/aarch64/share/hadoop/yarn/lib/xz-1.0.jar b/aarch64/share/hadoop/yarn/lib/xz-1.0.jar new file mode 100644 index 0000000..a848f16 Binary files /dev/null and b/aarch64/share/hadoop/yarn/lib/xz-1.0.jar differ diff --git a/aarch64/share/hadoop/yarn/sources/hadoop-yarn-api-2.2.0-sources.jar b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-api-2.2.0-sources.jar new file mode 100644 index 0000000..aca2f36 Binary files /dev/null and b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-api-2.2.0-sources.jar differ diff --git a/aarch64/share/hadoop/yarn/sources/hadoop-yarn-applications-distributedshell-2.2.0-sources.jar b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-applications-distributedshell-2.2.0-sources.jar new file mode 100644 index 0000000..e4d27af Binary files /dev/null and b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-applications-distributedshell-2.2.0-sources.jar differ diff --git a/aarch64/share/hadoop/yarn/sources/hadoop-yarn-applications-distributedshell-2.2.0-test-sources.jar b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-applications-distributedshell-2.2.0-test-sources.jar new file mode 100644 index 0000000..6c3c8d6 Binary files /dev/null and b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-applications-distributedshell-2.2.0-test-sources.jar differ diff --git a/aarch64/share/hadoop/yarn/sources/hadoop-yarn-applications-unmanaged-am-launcher-2.2.0-sources.jar b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-applications-unmanaged-am-launcher-2.2.0-sources.jar new file mode 100644 index 0000000..6febcd2 Binary files /dev/null and b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-applications-unmanaged-am-launcher-2.2.0-sources.jar differ diff --git a/aarch64/share/hadoop/yarn/sources/hadoop-yarn-applications-unmanaged-am-launcher-2.2.0-test-sources.jar b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-applications-unmanaged-am-launcher-2.2.0-test-sources.jar new file mode 100644 index 0000000..41ca395 Binary files /dev/null and b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-applications-unmanaged-am-launcher-2.2.0-test-sources.jar differ diff --git a/aarch64/share/hadoop/yarn/sources/hadoop-yarn-client-2.2.0-sources.jar b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-client-2.2.0-sources.jar new file mode 100644 index 0000000..72956dd Binary files /dev/null and b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-client-2.2.0-sources.jar differ diff --git a/aarch64/share/hadoop/yarn/sources/hadoop-yarn-client-2.2.0-test-sources.jar b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-client-2.2.0-test-sources.jar new file mode 100644 index 0000000..04cf7d0 Binary files /dev/null and b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-client-2.2.0-test-sources.jar differ diff --git a/aarch64/share/hadoop/yarn/sources/hadoop-yarn-common-2.2.0-sources.jar b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-common-2.2.0-sources.jar new file mode 100644 index 0000000..6d6e3f0 Binary files /dev/null and b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-common-2.2.0-sources.jar differ diff --git a/aarch64/share/hadoop/yarn/sources/hadoop-yarn-common-2.2.0-test-sources.jar b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-common-2.2.0-test-sources.jar new file mode 100644 index 0000000..62c9936 Binary files /dev/null and b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-common-2.2.0-test-sources.jar differ diff --git a/aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-common-2.2.0-sources.jar b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-common-2.2.0-sources.jar new file mode 100644 index 0000000..545b1db Binary files /dev/null and b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-common-2.2.0-sources.jar differ diff --git a/aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-common-2.2.0-test-sources.jar b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-common-2.2.0-test-sources.jar new file mode 100644 index 0000000..6cb33d5 Binary files /dev/null and b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-common-2.2.0-test-sources.jar differ diff --git a/aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-nodemanager-2.2.0-sources.jar b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-nodemanager-2.2.0-sources.jar new file mode 100644 index 0000000..8724909 Binary files /dev/null and b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-nodemanager-2.2.0-sources.jar differ diff --git a/aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-nodemanager-2.2.0-test-sources.jar b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-nodemanager-2.2.0-test-sources.jar new file mode 100644 index 0000000..06d373c Binary files /dev/null and b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-nodemanager-2.2.0-test-sources.jar differ diff --git a/aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-resourcemanager-2.2.0-sources.jar b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-resourcemanager-2.2.0-sources.jar new file mode 100644 index 0000000..ddede34 Binary files /dev/null and b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-resourcemanager-2.2.0-sources.jar differ diff --git a/aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-resourcemanager-2.2.0-test-sources.jar b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-resourcemanager-2.2.0-test-sources.jar new file mode 100644 index 0000000..4408951 Binary files /dev/null and b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-resourcemanager-2.2.0-test-sources.jar differ diff --git a/aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-tests-2.2.0-test-sources.jar b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-tests-2.2.0-test-sources.jar new file mode 100644 index 0000000..94cf1f8 Binary files /dev/null and b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-tests-2.2.0-test-sources.jar differ diff --git a/aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-web-proxy-2.2.0-sources.jar b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-web-proxy-2.2.0-sources.jar new file mode 100644 index 0000000..d59041c Binary files /dev/null and b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-web-proxy-2.2.0-sources.jar differ diff --git a/aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-web-proxy-2.2.0-test-sources.jar b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-web-proxy-2.2.0-test-sources.jar new file mode 100644 index 0000000..79d754f Binary files /dev/null and b/aarch64/share/hadoop/yarn/sources/hadoop-yarn-server-web-proxy-2.2.0-test-sources.jar differ diff --git a/aarch64/share/hadoop/yarn/test/hadoop-yarn-server-tests-2.2.0-tests.jar b/aarch64/share/hadoop/yarn/test/hadoop-yarn-server-tests-2.2.0-tests.jar new file mode 100644 index 0000000..c766af5 Binary files /dev/null and b/aarch64/share/hadoop/yarn/test/hadoop-yarn-server-tests-2.2.0-tests.jar differ -- cgit v1.2.3