Merge remote branch 'origin/master' into new-rdds

This commit is contained in:
Matei Zaharia 2011-03-01 10:33:37 -08:00
Родитель 9e59afd710 021c50a8d4
Коммит a789e9aaea
156 изменённых файлов: 100466 добавлений и 3318 удалений

Двоичные данные
core/lib/hadoop-0.20.0/.DS_Store поставляемый

Двоичный файл не отображается.

Просмотреть файл

@ -1,258 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef HADOOP_PIPES_HH
#define HADOOP_PIPES_HH
#ifdef SWIG
%module (directors="1") HadoopPipes
%include "std_string.i"
%feature("director") Mapper;
%feature("director") Reducer;
%feature("director") Partitioner;
%feature("director") RecordReader;
%feature("director") RecordWriter;
%feature("director") Factory;
#else
#include <string>
#endif
namespace HadoopPipes {
/**
* This interface defines the interface between application code and the
* foreign code interface to Hadoop Map/Reduce.
*/
/**
* A JobConf defines the properties for a job.
*/
class JobConf {
public:
virtual bool hasKey(const std::string& key) const = 0;
virtual const std::string& get(const std::string& key) const = 0;
virtual int getInt(const std::string& key) const = 0;
virtual float getFloat(const std::string& key) const = 0;
virtual bool getBoolean(const std::string&key) const = 0;
virtual ~JobConf() {}
};
/**
* Task context provides the information about the task and job.
*/
class TaskContext {
public:
/**
* Counter to keep track of a property and its value.
*/
class Counter {
private:
int id;
public:
Counter(int counterId) : id(counterId) {}
Counter(const Counter& counter) : id(counter.id) {}
int getId() const { return id; }
};
/**
* Get the JobConf for the current task.
*/
virtual const JobConf* getJobConf() = 0;
/**
* Get the current key.
* @return the current key
*/
virtual const std::string& getInputKey() = 0;
/**
* Get the current value.
* @return the current value
*/
virtual const std::string& getInputValue() = 0;
/**
* Generate an output record
*/
virtual void emit(const std::string& key, const std::string& value) = 0;
/**
* Mark your task as having made progress without changing the status
* message.
*/
virtual void progress() = 0;
/**
* Set the status message and call progress.
*/
virtual void setStatus(const std::string& status) = 0;
/**
* Register a counter with the given group and name.
*/
virtual Counter*
getCounter(const std::string& group, const std::string& name) = 0;
/**
* Increment the value of the counter with the given amount.
*/
virtual void incrementCounter(const Counter* counter, uint64_t amount) = 0;
virtual ~TaskContext() {}
};
class MapContext: public TaskContext {
public:
/**
* Access the InputSplit of the mapper.
*/
virtual const std::string& getInputSplit() = 0;
/**
* Get the name of the key class of the input to this task.
*/
virtual const std::string& getInputKeyClass() = 0;
/**
* Get the name of the value class of the input to this task.
*/
virtual const std::string& getInputValueClass() = 0;
};
class ReduceContext: public TaskContext {
public:
/**
* Advance to the next value.
*/
virtual bool nextValue() = 0;
};
class Closable {
public:
virtual void close() {}
virtual ~Closable() {}
};
/**
* The application's mapper class to do map.
*/
class Mapper: public Closable {
public:
virtual void map(MapContext& context) = 0;
};
/**
* The application's reducer class to do reduce.
*/
class Reducer: public Closable {
public:
virtual void reduce(ReduceContext& context) = 0;
};
/**
* User code to decide where each key should be sent.
*/
class Partitioner {
public:
virtual int partition(const std::string& key, int numOfReduces) = 0;
virtual ~Partitioner() {}
};
/**
* For applications that want to read the input directly for the map function
* they can define RecordReaders in C++.
*/
class RecordReader: public Closable {
public:
virtual bool next(std::string& key, std::string& value) = 0;
/**
* The progress of the record reader through the split as a value between
* 0.0 and 1.0.
*/
virtual float getProgress() = 0;
};
/**
* An object to write key/value pairs as they are emited from the reduce.
*/
class RecordWriter: public Closable {
public:
virtual void emit(const std::string& key,
const std::string& value) = 0;
};
/**
* A factory to create the necessary application objects.
*/
class Factory {
public:
virtual Mapper* createMapper(MapContext& context) const = 0;
virtual Reducer* createReducer(ReduceContext& context) const = 0;
/**
* Create a combiner, if this application has one.
* @return the new combiner or NULL, if one is not needed
*/
virtual Reducer* createCombiner(MapContext& context) const {
return NULL;
}
/**
* Create an application partitioner object.
* @return the new partitioner or NULL, if the default partitioner should be
* used.
*/
virtual Partitioner* createPartitioner(MapContext& context) const {
return NULL;
}
/**
* Create an application record reader.
* @return the new RecordReader or NULL, if the Java RecordReader should be
* used.
*/
virtual RecordReader* createRecordReader(MapContext& context) const {
return NULL;
}
/**
* Create an application record writer.
* @return the new RecordWriter or NULL, if the Java RecordWriter should be
* used.
*/
virtual RecordWriter* createRecordWriter(ReduceContext& context) const {
return NULL;
}
virtual ~Factory() {}
};
/**
* Run the assigned task in the framework.
* The user's main function should set the various functions using the
* set* functions above and then call this.
* @return true, if the task succeeded.
*/
bool runTask(const Factory& factory);
}
#endif

Просмотреть файл

@ -1,169 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef HADOOP_SERIAL_UTILS_HH
#define HADOOP_SERIAL_UTILS_HH
#include <string>
namespace HadoopUtils {
/**
* A simple exception class that records a message for the user.
*/
class Error {
private:
std::string error;
public:
/**
* Create an error object with the given message.
*/
Error(const std::string& msg);
/**
* Construct an error object with the given message that was created on
* the given file, line, and functino.
*/
Error(const std::string& msg,
const std::string& file, int line, const std::string& function);
/**
* Get the error message.
*/
const std::string& getMessage() const;
};
/**
* Check to make sure that the condition is true, and throw an exception
* if it is not. The exception will contain the message and a description
* of the source location.
*/
#define HADOOP_ASSERT(CONDITION, MESSAGE) \
{ \
if (!(CONDITION)) { \
throw HadoopUtils::Error((MESSAGE), __FILE__, __LINE__, \
__PRETTY_FUNCTION__); \
} \
}
/**
* An interface for an input stream.
*/
class InStream {
public:
/**
* Reads len bytes from the stream into the buffer.
* @param buf the buffer to read into
* @param buflen the length of the buffer
* @throws Error if there are problems reading
*/
virtual void read(void *buf, size_t len) = 0;
virtual ~InStream() {}
};
/**
* An interface for an output stream.
*/
class OutStream {
public:
/**
* Write the given buffer to the stream.
* @param buf the data to write
* @param len the number of bytes to write
* @throws Error if there are problems writing
*/
virtual void write(const void *buf, size_t len) = 0;
/**
* Flush the data to the underlying store.
*/
virtual void flush() = 0;
virtual ~OutStream() {}
};
/**
* A class to read a file as a stream.
*/
class FileInStream : public InStream {
public:
FileInStream();
bool open(const std::string& name);
bool open(FILE* file);
void read(void *buf, size_t buflen);
bool skip(size_t nbytes);
bool close();
virtual ~FileInStream();
private:
/**
* The file to write to.
*/
FILE *mFile;
/**
* Does is this class responsible for closing the FILE*?
*/
bool isOwned;
};
/**
* A class to write a stream to a file.
*/
class FileOutStream: public OutStream {
public:
/**
* Create a stream that isn't bound to anything.
*/
FileOutStream();
/**
* Create the given file, potentially overwriting an existing file.
*/
bool open(const std::string& name, bool overwrite);
bool open(FILE* file);
void write(const void* buf, size_t len);
bool advance(size_t nbytes);
void flush();
bool close();
virtual ~FileOutStream();
private:
FILE *mFile;
bool isOwned;
};
/**
* A stream that reads from a string.
*/
class StringInStream: public InStream {
public:
StringInStream(const std::string& str);
virtual void read(void *buf, size_t buflen);
private:
const std::string& buffer;
std::string::const_iterator itr;
};
void serializeInt(int32_t t, OutStream& stream);
int32_t deserializeInt(InStream& stream);
void serializeLong(int64_t t, OutStream& stream);
int64_t deserializeLong(InStream& stream);
void serializeFloat(float t, OutStream& stream);
float deserializeFloat(InStream& stream);
void serializeString(const std::string& t, OutStream& stream);
void deserializeString(std::string& t, InStream& stream);
}
#endif

Просмотреть файл

@ -1,81 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef HADOOP_STRING_UTILS_HH
#define HADOOP_STRING_UTILS_HH
#include <stdint.h>
#include <string>
#include <vector>
namespace HadoopUtils {
/**
* Convert an integer to a string.
*/
std::string toString(int32_t x);
/**
* Convert a string to an integer.
* @throws Error if the string is not a valid integer
*/
int32_t toInt(const std::string& val);
/**
* Convert the string to a float.
* @throws Error if the string is not a valid float
*/
float toFloat(const std::string& val);
/**
* Convert the string to a boolean.
* @throws Error if the string is not a valid boolean value
*/
bool toBool(const std::string& val);
/**
* Get the current time in the number of milliseconds since 1970.
*/
uint64_t getCurrentMillis();
/**
* Split a string into "words". Multiple deliminators are treated as a single
* word break, so no zero-length words are returned.
* @param str the string to split
* @param separator a list of characters that divide words
*/
std::vector<std::string> splitString(const std::string& str,
const char* separator);
/**
* Quote a string to avoid "\", non-printable characters, and the
* deliminators.
* @param str the string to quote
* @param deliminators the set of characters to always quote
*/
std::string quoteString(const std::string& str,
const char* deliminators);
/**
* Unquote the given string to return the original string.
* @param str the string to unquote
*/
std::string unquoteString(const std::string& str);
}
#endif

Просмотреть файл

@ -1,96 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef HADOOP_PIPES_TEMPLATE_FACTORY_HH
#define HADOOP_PIPES_TEMPLATE_FACTORY_HH
namespace HadoopPipes {
template <class mapper, class reducer>
class TemplateFactory2: public Factory {
public:
Mapper* createMapper(MapContext& context) const {
return new mapper(context);
}
Reducer* createReducer(ReduceContext& context) const {
return new reducer(context);
}
};
template <class mapper, class reducer, class partitioner>
class TemplateFactory3: public TemplateFactory2<mapper,reducer> {
public:
Partitioner* createPartitioner(MapContext& context) const {
return new partitioner(context);
}
};
template <class mapper, class reducer>
class TemplateFactory3<mapper, reducer, void>
: public TemplateFactory2<mapper,reducer> {
};
template <class mapper, class reducer, class partitioner, class combiner>
class TemplateFactory4
: public TemplateFactory3<mapper,reducer,partitioner>{
public:
Reducer* createCombiner(MapContext& context) const {
return new combiner(context);
}
};
template <class mapper, class reducer, class partitioner>
class TemplateFactory4<mapper,reducer,partitioner,void>
: public TemplateFactory3<mapper,reducer,partitioner>{
};
template <class mapper, class reducer, class partitioner,
class combiner, class recordReader>
class TemplateFactory5
: public TemplateFactory4<mapper,reducer,partitioner,combiner>{
public:
RecordReader* createRecordReader(MapContext& context) const {
return new recordReader(context);
}
};
template <class mapper, class reducer, class partitioner,class combiner>
class TemplateFactory5<mapper,reducer,partitioner,combiner,void>
: public TemplateFactory4<mapper,reducer,partitioner,combiner>{
};
template <class mapper, class reducer, class partitioner=void,
class combiner=void, class recordReader=void,
class recordWriter=void>
class TemplateFactory
: public TemplateFactory5<mapper,reducer,partitioner,combiner,recordReader>{
public:
RecordWriter* createRecordWriter(ReduceContext& context) const {
return new recordWriter(context);
}
};
template <class mapper, class reducer, class partitioner,
class combiner, class recordReader>
class TemplateFactory<mapper, reducer, partitioner, combiner, recordReader,
void>
: public TemplateFactory5<mapper,reducer,partitioner,combiner,recordReader>{
};
}
#endif

Двоичный файл не отображается.

Двоичный файл не отображается.

Просмотреть файл

@ -1,258 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef HADOOP_PIPES_HH
#define HADOOP_PIPES_HH
#ifdef SWIG
%module (directors="1") HadoopPipes
%include "std_string.i"
%feature("director") Mapper;
%feature("director") Reducer;
%feature("director") Partitioner;
%feature("director") RecordReader;
%feature("director") RecordWriter;
%feature("director") Factory;
#else
#include <string>
#endif
namespace HadoopPipes {
/**
* This interface defines the interface between application code and the
* foreign code interface to Hadoop Map/Reduce.
*/
/**
* A JobConf defines the properties for a job.
*/
class JobConf {
public:
virtual bool hasKey(const std::string& key) const = 0;
virtual const std::string& get(const std::string& key) const = 0;
virtual int getInt(const std::string& key) const = 0;
virtual float getFloat(const std::string& key) const = 0;
virtual bool getBoolean(const std::string&key) const = 0;
virtual ~JobConf() {}
};
/**
* Task context provides the information about the task and job.
*/
class TaskContext {
public:
/**
* Counter to keep track of a property and its value.
*/
class Counter {
private:
int id;
public:
Counter(int counterId) : id(counterId) {}
Counter(const Counter& counter) : id(counter.id) {}
int getId() const { return id; }
};
/**
* Get the JobConf for the current task.
*/
virtual const JobConf* getJobConf() = 0;
/**
* Get the current key.
* @return the current key
*/
virtual const std::string& getInputKey() = 0;
/**
* Get the current value.
* @return the current value
*/
virtual const std::string& getInputValue() = 0;
/**
* Generate an output record
*/
virtual void emit(const std::string& key, const std::string& value) = 0;
/**
* Mark your task as having made progress without changing the status
* message.
*/
virtual void progress() = 0;
/**
* Set the status message and call progress.
*/
virtual void setStatus(const std::string& status) = 0;
/**
* Register a counter with the given group and name.
*/
virtual Counter*
getCounter(const std::string& group, const std::string& name) = 0;
/**
* Increment the value of the counter with the given amount.
*/
virtual void incrementCounter(const Counter* counter, uint64_t amount) = 0;
virtual ~TaskContext() {}
};
class MapContext: public TaskContext {
public:
/**
* Access the InputSplit of the mapper.
*/
virtual const std::string& getInputSplit() = 0;
/**
* Get the name of the key class of the input to this task.
*/
virtual const std::string& getInputKeyClass() = 0;
/**
* Get the name of the value class of the input to this task.
*/
virtual const std::string& getInputValueClass() = 0;
};
class ReduceContext: public TaskContext {
public:
/**
* Advance to the next value.
*/
virtual bool nextValue() = 0;
};
class Closable {
public:
virtual void close() {}
virtual ~Closable() {}
};
/**
* The application's mapper class to do map.
*/
class Mapper: public Closable {
public:
virtual void map(MapContext& context) = 0;
};
/**
* The application's reducer class to do reduce.
*/
class Reducer: public Closable {
public:
virtual void reduce(ReduceContext& context) = 0;
};
/**
* User code to decide where each key should be sent.
*/
class Partitioner {
public:
virtual int partition(const std::string& key, int numOfReduces) = 0;
virtual ~Partitioner() {}
};
/**
* For applications that want to read the input directly for the map function
* they can define RecordReaders in C++.
*/
class RecordReader: public Closable {
public:
virtual bool next(std::string& key, std::string& value) = 0;
/**
* The progress of the record reader through the split as a value between
* 0.0 and 1.0.
*/
virtual float getProgress() = 0;
};
/**
* An object to write key/value pairs as they are emited from the reduce.
*/
class RecordWriter: public Closable {
public:
virtual void emit(const std::string& key,
const std::string& value) = 0;
};
/**
* A factory to create the necessary application objects.
*/
class Factory {
public:
virtual Mapper* createMapper(MapContext& context) const = 0;
virtual Reducer* createReducer(ReduceContext& context) const = 0;
/**
* Create a combiner, if this application has one.
* @return the new combiner or NULL, if one is not needed
*/
virtual Reducer* createCombiner(MapContext& context) const {
return NULL;
}
/**
* Create an application partitioner object.
* @return the new partitioner or NULL, if the default partitioner should be
* used.
*/
virtual Partitioner* createPartitioner(MapContext& context) const {
return NULL;
}
/**
* Create an application record reader.
* @return the new RecordReader or NULL, if the Java RecordReader should be
* used.
*/
virtual RecordReader* createRecordReader(MapContext& context) const {
return NULL;
}
/**
* Create an application record writer.
* @return the new RecordWriter or NULL, if the Java RecordWriter should be
* used.
*/
virtual RecordWriter* createRecordWriter(ReduceContext& context) const {
return NULL;
}
virtual ~Factory() {}
};
/**
* Run the assigned task in the framework.
* The user's main function should set the various functions using the
* set* functions above and then call this.
* @return true, if the task succeeded.
*/
bool runTask(const Factory& factory);
}
#endif

Просмотреть файл

@ -1,169 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef HADOOP_SERIAL_UTILS_HH
#define HADOOP_SERIAL_UTILS_HH
#include <string>
namespace HadoopUtils {
/**
* A simple exception class that records a message for the user.
*/
class Error {
private:
std::string error;
public:
/**
* Create an error object with the given message.
*/
Error(const std::string& msg);
/**
* Construct an error object with the given message that was created on
* the given file, line, and functino.
*/
Error(const std::string& msg,
const std::string& file, int line, const std::string& function);
/**
* Get the error message.
*/
const std::string& getMessage() const;
};
/**
* Check to make sure that the condition is true, and throw an exception
* if it is not. The exception will contain the message and a description
* of the source location.
*/
#define HADOOP_ASSERT(CONDITION, MESSAGE) \
{ \
if (!(CONDITION)) { \
throw HadoopUtils::Error((MESSAGE), __FILE__, __LINE__, \
__PRETTY_FUNCTION__); \
} \
}
/**
* An interface for an input stream.
*/
class InStream {
public:
/**
* Reads len bytes from the stream into the buffer.
* @param buf the buffer to read into
* @param buflen the length of the buffer
* @throws Error if there are problems reading
*/
virtual void read(void *buf, size_t len) = 0;
virtual ~InStream() {}
};
/**
* An interface for an output stream.
*/
class OutStream {
public:
/**
* Write the given buffer to the stream.
* @param buf the data to write
* @param len the number of bytes to write
* @throws Error if there are problems writing
*/
virtual void write(const void *buf, size_t len) = 0;
/**
* Flush the data to the underlying store.
*/
virtual void flush() = 0;
virtual ~OutStream() {}
};
/**
* A class to read a file as a stream.
*/
class FileInStream : public InStream {
public:
FileInStream();
bool open(const std::string& name);
bool open(FILE* file);
void read(void *buf, size_t buflen);
bool skip(size_t nbytes);
bool close();
virtual ~FileInStream();
private:
/**
* The file to write to.
*/
FILE *mFile;
/**
* Does is this class responsible for closing the FILE*?
*/
bool isOwned;
};
/**
* A class to write a stream to a file.
*/
class FileOutStream: public OutStream {
public:
/**
* Create a stream that isn't bound to anything.
*/
FileOutStream();
/**
* Create the given file, potentially overwriting an existing file.
*/
bool open(const std::string& name, bool overwrite);
bool open(FILE* file);
void write(const void* buf, size_t len);
bool advance(size_t nbytes);
void flush();
bool close();
virtual ~FileOutStream();
private:
FILE *mFile;
bool isOwned;
};
/**
* A stream that reads from a string.
*/
class StringInStream: public InStream {
public:
StringInStream(const std::string& str);
virtual void read(void *buf, size_t buflen);
private:
const std::string& buffer;
std::string::const_iterator itr;
};
void serializeInt(int32_t t, OutStream& stream);
int32_t deserializeInt(InStream& stream);
void serializeLong(int64_t t, OutStream& stream);
int64_t deserializeLong(InStream& stream);
void serializeFloat(float t, OutStream& stream);
float deserializeFloat(InStream& stream);
void serializeString(const std::string& t, OutStream& stream);
void deserializeString(std::string& t, InStream& stream);
}
#endif

Просмотреть файл

@ -1,81 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef HADOOP_STRING_UTILS_HH
#define HADOOP_STRING_UTILS_HH
#include <stdint.h>
#include <string>
#include <vector>
namespace HadoopUtils {
/**
* Convert an integer to a string.
*/
std::string toString(int32_t x);
/**
* Convert a string to an integer.
* @throws Error if the string is not a valid integer
*/
int32_t toInt(const std::string& val);
/**
* Convert the string to a float.
* @throws Error if the string is not a valid float
*/
float toFloat(const std::string& val);
/**
* Convert the string to a boolean.
* @throws Error if the string is not a valid boolean value
*/
bool toBool(const std::string& val);
/**
* Get the current time in the number of milliseconds since 1970.
*/
uint64_t getCurrentMillis();
/**
* Split a string into "words". Multiple deliminators are treated as a single
* word break, so no zero-length words are returned.
* @param str the string to split
* @param separator a list of characters that divide words
*/
std::vector<std::string> splitString(const std::string& str,
const char* separator);
/**
* Quote a string to avoid "\", non-printable characters, and the
* deliminators.
* @param str the string to quote
* @param deliminators the set of characters to always quote
*/
std::string quoteString(const std::string& str,
const char* deliminators);
/**
* Unquote the given string to return the original string.
* @param str the string to unquote
*/
std::string unquoteString(const std::string& str);
}
#endif

Просмотреть файл

@ -1,96 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef HADOOP_PIPES_TEMPLATE_FACTORY_HH
#define HADOOP_PIPES_TEMPLATE_FACTORY_HH
namespace HadoopPipes {
template <class mapper, class reducer>
class TemplateFactory2: public Factory {
public:
Mapper* createMapper(MapContext& context) const {
return new mapper(context);
}
Reducer* createReducer(ReduceContext& context) const {
return new reducer(context);
}
};
template <class mapper, class reducer, class partitioner>
class TemplateFactory3: public TemplateFactory2<mapper,reducer> {
public:
Partitioner* createPartitioner(MapContext& context) const {
return new partitioner(context);
}
};
template <class mapper, class reducer>
class TemplateFactory3<mapper, reducer, void>
: public TemplateFactory2<mapper,reducer> {
};
template <class mapper, class reducer, class partitioner, class combiner>
class TemplateFactory4
: public TemplateFactory3<mapper,reducer,partitioner>{
public:
Reducer* createCombiner(MapContext& context) const {
return new combiner(context);
}
};
template <class mapper, class reducer, class partitioner>
class TemplateFactory4<mapper,reducer,partitioner,void>
: public TemplateFactory3<mapper,reducer,partitioner>{
};
template <class mapper, class reducer, class partitioner,
class combiner, class recordReader>
class TemplateFactory5
: public TemplateFactory4<mapper,reducer,partitioner,combiner>{
public:
RecordReader* createRecordReader(MapContext& context) const {
return new recordReader(context);
}
};
template <class mapper, class reducer, class partitioner,class combiner>
class TemplateFactory5<mapper,reducer,partitioner,combiner,void>
: public TemplateFactory4<mapper,reducer,partitioner,combiner>{
};
template <class mapper, class reducer, class partitioner=void,
class combiner=void, class recordReader=void,
class recordWriter=void>
class TemplateFactory
: public TemplateFactory5<mapper,reducer,partitioner,combiner,recordReader>{
public:
RecordWriter* createRecordWriter(ReduceContext& context) const {
return new recordWriter(context);
}
};
template <class mapper, class reducer, class partitioner,
class combiner, class recordReader>
class TemplateFactory<mapper, reducer, partitioner, combiner, recordReader,
void>
: public TemplateFactory5<mapper,reducer,partitioner,combiner,recordReader>{
};
}
#endif

Двоичный файл не отображается.

Двоичный файл не отображается.

Просмотреть файл

@ -1,41 +0,0 @@
# libhdfs.la - a libtool library file
# Generated by ltmain.sh (GNU libtool) 2.2
#
# Please DO NOT delete this file!
# It is necessary for linking the library.
# The name that we can dlopen(3).
dlname='libhdfs.so.0'
# Names of this library.
library_names='libhdfs.so.0.0.0 libhdfs.so.0 libhdfs.so'
# The name of the static archive.
old_library=''
# Linker flags that can not go in dependency_libs.
inherited_linker_flags=''
# Libraries that this one depends upon.
dependency_libs=' -L/home/hadoopqa/tools/java/latest1.6-32/jre/lib/i386/server -ljvm -ldl -lpthread'
# Names of additional weak libraries provided by this library
weak_library_names=''
# Version information for libhdfs.
current=0
age=0
revision=0
# Is this an already installed library?
installed=yes
# Should we warn about portability when linking against -modules?
shouldnotlink=no
# Files to dlopen/dlpreopen
dlopen=''
dlpreopen=''
# Directory that this library needs to be installed in:
libdir='/home/ndaley/hadoop/branch-0.20/build/c++/Linux-i386-32/lib'

Двоичный файл не отображается.

Двоичный файл не отображается.

Двоичный файл не отображается.

Двоичный файл не отображается.

Двоичный файл не отображается.

Двоичный файл не отображается.

Двоичный файл не отображается.

Просмотреть файл

@ -1,30 +0,0 @@
HDFSPROXY is an HTTPS proxy server that exposes the same HSFTP interface as a
real cluster. It authenticates users via user certificates and enforce access
control based on configuration files.
Starting up an HDFSPROXY server is similar to starting up an HDFS cluster.
Simply run "hdfsproxy" shell command. The main configuration file is
hdfsproxy-default.xml, which should be on the classpath. hdfsproxy-env.sh
can be used to set up environmental variables. In particular, JAVA_HOME should
be set. Additional configuration files include user-certs.xml,
user-permissions.xml and ssl-server.xml, which are used to specify allowed user
certs, allowed directories/files, and ssl keystore information for the proxy,
respectively. The location of these files can be specified in
hdfsproxy-default.xml. Environmental variable HDFSPROXY_CONF_DIR can be used to
point to the directory where these configuration files are located. The
configuration files of the proxied HDFS cluster should also be available on the
classpath (hdfs-default.xml and hdfs-site.xml).
Mirroring those used in HDFS, a few shell scripts are provided to start and
stop a group of proxy servers. The hosts to run hdfsproxy on are specified in
hdfsproxy-hosts file, one host per line. All hdfsproxy servers are stateless
and run independently from each other. Simple load balancing can be set up by
mapping all hdfsproxy server IP addresses to a single hostname. Users should
use that hostname to access the proxy. If an IP address look up for that
hostname returns more than one IP addresses, an HFTP/HSFTP client will randomly
pick one to use.
Command "hdfsproxy -reloadPermFiles" can be used to trigger reloading of
user-certs.xml and user-permissions.xml files on all proxy servers listed in
the hdfsproxy-hosts file. Similarly, "hdfsproxy -clearUgiCache" command can be
used to clear the UGI caches on all proxy servers.

Просмотреть файл

@ -1,170 +0,0 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# The HdfsProxy command script
#
# Environment Variables
#
# JAVA_HOME The java implementation to use. Overrides JAVA_HOME.
#
# HDFSPROXY_CLASSPATH Extra Java CLASSPATH entries.
#
# HDFSPROXY_HEAPSIZE The maximum amount of heap to use, in MB.
# Default is 1000.
#
# HDFSPROXY_OPTS Extra Java runtime options.
#
# HDFSPROXY_NAMENODE_OPTS These options are added to HDFSPROXY_OPTS
# HDFSPROXY_CLIENT_OPTS when the respective command is run.
# HDFSPROXY_{COMMAND}_OPTS etc HDFSPROXY_JT_OPTS applies to JobTracker
# for e.g. HDFSPROXY_CLIENT_OPTS applies to
# more than one command (fs, dfs, fsck,
# dfsadmin etc)
#
# HDFSPROXY_CONF_DIR Alternate conf dir. Default is ${HDFSPROXY_HOME}/conf.
#
# HDFSPROXY_ROOT_LOGGER The root appender. Default is INFO,console
#
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
. "$bin"/hdfsproxy-config.sh
cygwin=false
case "`uname`" in
CYGWIN*) cygwin=true;;
esac
if [ -f "${HDFSPROXY_CONF_DIR}/hdfsproxy-env.sh" ]; then
. "${HDFSPROXY_CONF_DIR}/hdfsproxy-env.sh"
fi
# some Java parameters
if [ "$JAVA_HOME" != "" ]; then
#echo "run java in $JAVA_HOME"
JAVA_HOME=$JAVA_HOME
fi
if [ "$JAVA_HOME" = "" ]; then
echo "Error: JAVA_HOME is not set."
exit 1
fi
JAVA=$JAVA_HOME/bin/java
JAVA_HEAP_MAX=-Xmx1000m
# check envvars which might override default args
if [ "$HDFSPROXY_HEAPSIZE" != "" ]; then
#echo "run with heapsize $HDFSPROXY_HEAPSIZE"
JAVA_HEAP_MAX="-Xmx""$HDFSPROXY_HEAPSIZE""m"
#echo $JAVA_HEAP_MAX
fi
# CLASSPATH initially contains $HDFSPROXY_CONF_DIR
CLASSPATH="${HDFSPROXY_CONF_DIR}"
CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
# for developers, add HdfsProxy classes to CLASSPATH
if [ -d "$HDFSPROXY_HOME/build/classes" ]; then
CLASSPATH=${CLASSPATH}:$HDFSPROXY_HOME/build/classes
fi
if [ -d "$HDFSPROXY_HOME/build/webapps" ]; then
CLASSPATH=${CLASSPATH}:$HDFSPROXY_HOME/build
fi
if [ -d "$HDFSPROXY_HOME/build/test/classes" ]; then
CLASSPATH=${CLASSPATH}:$HDFSPROXY_HOME/build/test/classes
fi
# so that filenames w/ spaces are handled correctly in loops below
IFS=
# for releases, add hdfsproxy jar & webapps to CLASSPATH
if [ -d "$HDFSPROXY_HOME/webapps" ]; then
CLASSPATH=${CLASSPATH}:$HDFSPROXY_HOME
fi
for f in $HDFSPROXY_HOME/hdfsproxy-*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
# add libs to CLASSPATH
if [ -d "$HDFSPROXY_HOME/lib" ]; then
for f in $HDFSPROXY_HOME/lib/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
fi
if [ -d "$HDFSPROXY_HOME/../../" ]; then
for f in $HDFSPROXY_HOME/../../*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
fi
if [ -d "$HDFSPROXY_HOME/../../lib" ]; then
for f in $HDFSPROXY_HOME/../../lib/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
fi
if [ -d "$HDFSPROXY_HOME/../../lib/jsp-2.1" ]; then
for f in $HDFSPROXY_HOME/../../lib/jsp-2.1/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
fi
# add user-specified CLASSPATH last
if [ "$HDFSPROXY_CLASSPATH" != "" ]; then
CLASSPATH=${CLASSPATH}:${HDFSPROXY_CLASSPATH}
fi
# default log directory & file
if [ "$HDFSPROXY_LOG_DIR" = "" ]; then
HDFSPROXY_LOG_DIR="$HDFSPROXY_HOME/logs"
fi
if [ "$HDFSPROXY_LOGFILE" = "" ]; then
HDFSPROXY_LOGFILE='hdfsproxy.log'
fi
# restore ordinary behaviour
unset IFS
# figure out which class to run
CLASS='org.apache.hadoop.hdfsproxy.HdfsProxy'
# cygwin path translation
if $cygwin; then
CLASSPATH=`cygpath -p -w "$CLASSPATH"`
HDFSPROXY_HOME=`cygpath -d "$HDFSPROXY_HOME"`
HDFSPROXY_LOG_DIR=`cygpath -d "$HDFSPROXY_LOG_DIR"`
fi
# cygwin path translation
if $cygwin; then
JAVA_LIBRARY_PATH=`cygpath -p "$JAVA_LIBRARY_PATH"`
fi
HDFSPROXY_OPTS="$HDFSPROXY_OPTS -Dhdfsproxy.log.dir=$HDFSPROXY_LOG_DIR"
HDFSPROXY_OPTS="$HDFSPROXY_OPTS -Dhdfsproxy.log.file=$HDFSPROXY_LOGFILE"
HDFSPROXY_OPTS="$HDFSPROXY_OPTS -Dhdfsproxy.home.dir=$HDFSPROXY_HOME"
HDFSPROXY_OPTS="$HDFSPROXY_OPTS -Dhdfsproxy.id.str=$HDFSPROXY_IDENT_STRING"
HDFSPROXY_OPTS="$HDFSPROXY_OPTS -Dhdfsproxy.root.logger=${HDFSPROXY_ROOT_LOGGER:-INFO,console}"
if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
HDFSPROXY_OPTS="$HDFSPROXY_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
fi
# run it
exec "$JAVA" $JAVA_HEAP_MAX $HDFSPROXY_OPTS -classpath "$CLASSPATH" $CLASS "$@"

Просмотреть файл

@ -1,67 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# included in all the hadoop scripts with source command
# should not be executable directly
# also should not be passed any arguments, since we need original $*
# resolve links - $0 may be a softlink
this="$0"
while [ -h "$this" ]; do
ls=`ls -ld "$this"`
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '.*/.*' > /dev/null; then
this="$link"
else
this=`dirname "$this"`/"$link"
fi
done
# convert relative path to absolute path
bin=`dirname "$this"`
script=`basename "$this"`
bin=`cd "$bin"; pwd`
this="$bin/$script"
# the root of the HdfsProxy installation
export HDFSPROXY_HOME=`dirname "$this"`/..
#check to see if the conf dir is given as an optional argument
if [ $# -gt 1 ]
then
if [ "--config" = "$1" ]
then
shift
confdir=$1
shift
HDFSPROXY_CONF_DIR=$confdir
fi
fi
# Allow alternate conf dir location.
HDFSPROXY_CONF_DIR="${HDFSPROXY_CONF_DIR:-$HDFSPROXY_HOME/conf}"
#check to see it is specified whether to use the slaves file
if [ $# -gt 1 ]
then
if [ "--hosts" = "$1" ]
then
shift
slavesfile=$1
shift
export HDFSPROXY_SLAVES="${HDFSPROXY_CONF_DIR}/$slavesfile"
fi
fi

Просмотреть файл

@ -1,141 +0,0 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Runs a HdfsProxy as a daemon.
#
# Environment Variables
#
# HDFSPROXY_CONF_DIR Alternate conf dir. Default is ${HDFSPROXY_HOME}/conf.
# HDFSPROXY_LOG_DIR Where log files are stored. PWD by default.
# HDFSPROXY_MASTER host:path where hdfsproxy code should be rsync'd from
# HDFSPROXY_PID_DIR The pid files are stored. /tmp by default.
# HDFSPROXY_IDENT_STRING A string representing this instance of hdfsproxy. $USER by default
# HDFSPROXY_NICENESS The scheduling priority for daemons. Defaults to 0.
##
usage="Usage: hdfsproxy-daemon.sh [--config <conf-dir>] [--hosts hostlistfile] (start|stop) "
# if no args specified, show usage
if [ $# -le 1 ]; then
echo $usage
exit 1
fi
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
. "$bin"/hdfsproxy-config.sh
# get arguments
startStop=$1
shift
hdfsproxy_rotate_log ()
{
log=$1;
num=5;
if [ -n "$2" ]; then
num=$2
fi
if [ -f "$log" ]; then # rotate logs
while [ $num -gt 1 ]; do
prev=`expr $num - 1`
[ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num"
num=$prev
done
mv "$log" "$log.$num";
fi
}
if [ -f "${HDFSPROXY_CONF_DIR}/hdfsproxy-env.sh" ]; then
. "${HDFSPROXY_CONF_DIR}/hdfsproxy-env.sh"
fi
# get log directory
if [ "$HDFSPROXY_LOG_DIR" = "" ]; then
export HDFSPROXY_LOG_DIR="$HDFSPROXY_HOME/logs"
fi
mkdir -p "$HDFSPROXY_LOG_DIR"
if [ "$HDFSPROXY_PID_DIR" = "" ]; then
HDFSPROXY_PID_DIR=/tmp
fi
if [ "$HDFSPROXY_IDENT_STRING" = "" ]; then
export HDFSPROXY_IDENT_STRING="$USER"
fi
# some variables
export HDFSPROXY_LOGFILE=hdfsproxy-$HDFSPROXY_IDENT_STRING-$HOSTNAME.log
export HDFSPROXY_ROOT_LOGGER="INFO,DRFA"
log=$HDFSPROXY_LOG_DIR/hdfsproxy-$HDFSPROXY_IDENT_STRING-$HOSTNAME.out
pid=$HDFSPROXY_PID_DIR/hdfsproxy-$HDFSPROXY_IDENT_STRING.pid
# Set default scheduling priority
if [ "$HDFSPROXY_NICENESS" = "" ]; then
export HDFSPROXY_NICENESS=0
fi
case $startStop in
(start)
mkdir -p "$HDFSPROXY_PID_DIR"
if [ -f $pid ]; then
if kill -0 `cat $pid` > /dev/null 2>&1; then
echo hdfsproxy running as process `cat $pid`. Stop it first.
exit 1
fi
fi
if [ "$HDFSPROXY_MASTER" != "" ]; then
echo rsync from $HDFSPROXY_MASTER
rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $HDFSPROXY_MASTER/ "$HDFSPROXY_HOME"
fi
hdfsproxy_rotate_log $log
echo starting hdfsproxy, logging to $log
cd "$HDFSPROXY_HOME"
nohup nice -n $HDFSPROXY_NICENESS "$HDFSPROXY_HOME"/bin/hdfsproxy --config $HDFSPROXY_CONF_DIR "$@" > "$log" 2>&1 < /dev/null &
echo $! > $pid
sleep 1; head "$log"
;;
(stop)
if [ -f $pid ]; then
if kill -0 `cat $pid` > /dev/null 2>&1; then
echo stopping hdfsproxy
kill `cat $pid`
else
echo no hdfsproxy to stop
fi
else
echo no hdfsproxy to stop
fi
;;
(*)
echo $usage
exit 1
;;
esac

Просмотреть файл

@ -1,34 +0,0 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Run a HdfsProxy command on all slave hosts.
usage="Usage: hdfsproxy-daemons.sh [--config confdir] [--hosts hostlistfile] [start|stop] "
# if no args specified, show usage
if [ $# -le 1 ]; then
echo $usage
exit 1
fi
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
. $bin/hdfsproxy-config.sh
exec "$bin/hdfsproxy-slaves.sh" --config $HDFSPROXY_CONF_DIR cd "$HDFSPROXY_HOME" \; "$bin/hdfsproxy-daemon.sh" --config $HDFSPROXY_CONF_DIR "$@"

Просмотреть файл

@ -1,68 +0,0 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Run a shell command on all slave hosts.
#
# Environment Variables
#
# HDFSPROXY_SLAVES File naming remote hosts.
# Default is ${HDFSPROXY_CONF_DIR}/hdfsproxy-hosts.
# HDFSPROXY_CONF_DIR Alternate conf dir. Default is ${HDFSPROXY_HOME}/conf.
# HDFSPROXY_SLAVE_SLEEP Seconds to sleep between spawning remote commands.
# HDFSPROXY_SSH_OPTS Options passed to ssh when running remote commands.
##
usage="Usage: hdfsproxy-slaves.sh [--config confdir] command..."
# if no args specified, show usage
if [ $# -le 0 ]; then
echo $usage
exit 1
fi
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
. "$bin"/hdfsproxy-config.sh
# If the slaves file is specified in the command line,
# then it takes precedence over the definition in
# hdfsproxy-env.sh. Save it here.
HOSTLIST=$HDFSPROXY_SLAVES
if [ -f "${HDFSPROXY_CONF_DIR}/hdfsproxy-env.sh" ]; then
. "${HDFSPROXY_CONF_DIR}/hdfsproxy-env.sh"
fi
if [ "$HOSTLIST" = "" ]; then
if [ "$HDFSPROXY_SLAVES" = "" ]; then
export HOSTLIST="${HDFSPROXY_CONF_DIR}/hdfsproxy-hosts"
else
export HOSTLIST="${HDFSPROXY_SLAVES}"
fi
fi
for slave in `cat "$HOSTLIST"`; do
ssh $HDFSPROXY_SSH_OPTS $slave $"${@// /\\ }" \
2>&1 | sed "s/^/$slave: /" &
if [ "$HDFSPROXY_SLAVE_SLEEP" != "" ]; then
sleep $HDFSPROXY_SLAVE_SLEEP
fi
done
wait

Просмотреть файл

@ -1,37 +0,0 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Start hdfsproxy daemons.
# Run this on master node.
usage="Usage: start-hdfsproxy.sh"
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
. "$bin"/hdfsproxy-config.sh
# get arguments
if [ $# -ge 1 ]; then
echo $usage
exit 1
fi
# start hdfsproxy daemons
# "$bin"/hdfsproxy-daemon.sh --config $HDFSPROXY_CONF_DIR start
"$bin"/hdfsproxy-daemons.sh --config $HDFSPROXY_CONF_DIR --hosts hdfsproxy-hosts start

Просмотреть файл

@ -1,28 +0,0 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Stop hdfsproxy daemons. Run this on master node.
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
. "$bin"/hdfsproxy-config.sh
# "$bin"/hdfsproxy-daemon.sh --config $HDFSPROXY_CONF_DIR stop
"$bin"/hdfsproxy-daemons.sh --config $HDFSPROXY_CONF_DIR --hosts hdfsproxy-hosts stop

Просмотреть файл

@ -1,183 +0,0 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project name="hdfsproxy" default="jar">
<property name="hdfsproxyVersion" value="1.0"/>
<property name="final.name" value="${ant.project.name}-${hdfsproxyVersion}"/>
<property name="bin.dir" value="${basedir}/bin"/>
<property name="lib.dir" value="${basedir}/lib"/>
<property name="conf.dir" value="${basedir}/conf"/>
<property name="docs.dir" value="${basedir}/docs"/>
<import file="../build-contrib.xml"/>
<target name="jar" depends="compile" description="Create jar">
<echo>
Building the .jar files.
</echo>
<jar jarfile="${build.dir}/${final.name}.jar" basedir="${build.classes}" includes="org/apache/hadoop/hdfsproxy/**/*.class" >
<manifest>
<section name="org/apache/hadoop/hdfsproxy">
<attribute name="Implementation-Title" value="HdfsProxy"/>
<attribute name="Implementation-Version" value="${hdfsproxyVersion}"/>
<attribute name="Implementation-Vendor" value="Apache"/>
</section>
</manifest>
</jar>
</target>
<!-- ====================================================== -->
<!-- Macro definitions -->
<!-- ====================================================== -->
<macrodef name="macro_tar" description="Worker Macro for tar">
<attribute name="param.destfile"/>
<element name="param.listofitems"/>
<sequential>
<tar compression="gzip" longfile="gnu"
destfile="@{param.destfile}">
<param.listofitems/>
</tar>
</sequential>
</macrodef>
<!-- ================================================================== -->
<!-- D I S T R I B U T I O N -->
<!-- ================================================================== -->
<!-- -->
<!-- ================================================================== -->
<target name="local-package" depends="jar" description="Package in local build directory">
<mkdir dir="${build.dir}/${final.name}"/>
<mkdir dir="${build.dir}/${final.name}/logs"/>
<copy todir="${build.dir}/${final.name}" includeEmptyDirs="false">
<fileset dir="${build.dir}">
<include name="*.jar" />
<include name="*.war" />
</fileset>
</copy>
<copy todir="${build.dir}/${final.name}/lib" includeEmptyDirs="false">
<fileset dir="${common.ivy.lib.dir}">
<include name="commons-logging-${commons-logging.version}"/>
<include name="commons-logging-api-${commons-logging-api.version}.jar"/>
<include name="junit-${junit.version}.jar"/>
<include name="log4j-${log4j.version}.jar"/>
<include name="slf4j-api-${slf4j-api.version}.jar"/>
<include name="slf4j-log4j${slf4j-log4j.version}.jar"/>
<include name="xmlenc-${xmlenc.version}.jar"/>
<include name="jetty-${jetty.version}.jar"/>
<include name="servlet-api-${servlet-api-2.5.version}.jar"/>
<include name="core-${core.vesion}"/>
</fileset>
<fileset dir="${hadoop.root}/lib/jsp-${jsp.version}">
<include name="jsp-${jsp.version}"/>
<include name="jsp-api-${jsp-api.vesion}"/>
</fileset>
</copy>
<copy todir="${build.dir}/${final.name}/lib" includeEmptyDirs="false">
<fileset dir="${hadoop.root}/build">
<include name="*-core.jar"/>
<include name="*-tools.jar"/>
</fileset>
</copy>
<copy todir="${build.dir}/${final.name}/bin">
<fileset dir="${bin.dir}"/>
</copy>
<copy todir="${build.dir}/${final.name}/conf">
<fileset dir="${conf.dir}"/>
</copy>
<copy todir="${build.dir}/${final.name}">
<fileset dir="${basedir}">
<include name="README" />
<include name="build.xml" />
<include name="*.txt" />
</fileset>
</copy>
<copy todir="${build.dir}/${final.name}/src" includeEmptyDirs="true">
<fileset dir="${src.dir}" excludes="**/*.template **/docs/build/**/*"/>
</copy>
<chmod perm="ugo+x" type="file" parallel="false">
<fileset dir="${build.dir}/${final.name}/bin"/>
</chmod>
</target>
<target name="package" depends="local-package" description="Build distribution">
<mkdir dir="${dist.dir}/contrib/${name}"/>
<copy todir="${dist.dir}/contrib/${name}">
<fileset dir="${build.dir}/${final.name}">
<exclude name="**/lib/**" />
<exclude name="**/src/**" />
</fileset>
</copy>
<chmod dir="${dist.dir}/contrib/${name}/bin" perm="a+x" includes="*"/>
</target>
<!-- ================================================================== -->
<!-- Make release tarball -->
<!-- ================================================================== -->
<target name="tar" depends="local-package" description="Make release tarball">
<macro_tar param.destfile="${build.dir}/${final.name}.tar.gz">
<param.listofitems>
<tarfileset dir="${build.dir}" mode="664">
<exclude name="${final.name}/bin/*" />
<include name="${final.name}/**" />
</tarfileset>
<tarfileset dir="${build.dir}" mode="755">
<include name="${final.name}/bin/*" />
</tarfileset>
</param.listofitems>
</macro_tar>
</target>
<target name="binary" depends="local-package" description="Make tarball without source and documentation">
<macro_tar param.destfile="${build.dir}/${final.name}-bin.tar.gz">
<param.listofitems>
<tarfileset dir="${build.dir}" mode="664">
<exclude name="${final.name}/bin/*" />
<exclude name="${final.name}/src/**" />
<exclude name="${final.name}/docs/**" />
<include name="${final.name}/**" />
</tarfileset>
<tarfileset dir="${build.dir}" mode="755">
<include name="${final.name}/bin/*" />
</tarfileset>
</param.listofitems>
</macro_tar>
</target>
<!-- the unit test classpath -->
<path id="test.classpath">
<pathelement location="${build.test}" />
<pathelement location="${hadoop.root}/build/test/classes"/>
<pathelement location="${hadoop.root}/src/contrib/test"/>
<pathelement location="${hadoop.root}/conf"/>
<pathelement location="${hadoop.root}/build"/>
<pathelement location="${hadoop.root}/build/classes"/>
<pathelement location="${hadoop.root}/build/tools"/>
<pathelement location="${build.examples}"/>
<path refid="contrib-classpath"/>
</path>
</project>

Просмотреть файл

@ -1,24 +0,0 @@
<?xml version="1.0"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:output method="html"/>
<xsl:template match="configuration">
<html>
<body>
<table border="1">
<tr>
<td>name</td>
<td>value</td>
<td>description</td>
</tr>
<xsl:for-each select="property">
<tr>
<td><a name="{name}"><xsl:value-of select="name"/></a></td>
<td><xsl:value-of select="value"/></td>
<td><xsl:value-of select="description"/></td>
</tr>
</xsl:for-each>
</table>
</body>
</html>
</xsl:template>
</xsl:stylesheet>

Просмотреть файл

@ -1,59 +0,0 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put hdfsproxy specific properties in this file. -->
<configuration>
<property>
<name>hdfsproxy.https.address</name>
<value>0.0.0.0:50479</value>
<description>the SSL port that hdfsproxy listens on
</description>
</property>
<property>
<name>hdfsproxy.hosts</name>
<value>hdfsproxy-hosts</value>
<description>location of hdfsproxy-hosts file
</description>
</property>
<property>
<name>hdfsproxy.dfs.namenode.address</name>
<value></value>
<description>namenode address of the HDFS cluster being proxied
</description>
</property>
<property>
<name>hdfsproxy.https.server.keystore.resource</name>
<value>ssl-server.xml</value>
<description>location of the resource from which ssl server keystore
information will be extracted
</description>
</property>
<property>
<name>hdfsproxy.user.permissions.file.location</name>
<value>user-permissions.xml</value>
<description>location of the user permissions file
</description>
</property>
<property>
<name>hdfsproxy.user.certs.file.location</name>
<value>user-certs.xml</value>
<description>location of the user certs file
</description>
</property>
<property>
<name>hdfsproxy.ugi.cache.ugi.lifetime</name>
<value>15</value>
<description> The lifetime (in minutes) of a cached ugi
</description>
</property>
</configuration>

Просмотреть файл

@ -1,44 +0,0 @@
# Set HdfsProxy-specific environment variables here.
# The only required environment variable is JAVA_HOME. All others are
# optional. When running a distributed configuration it is best to
# set JAVA_HOME in this file, so that it is correctly defined on
# remote nodes.
# The java implementation to use. Required.
# export JAVA_HOME=/usr/lib/j2sdk1.5-sun
# Extra Java CLASSPATH elements. Optional.
# export HDFSPROXY_CLASSPATH=
# The maximum amount of heap to use, in MB. Default is 1000.
# export HDFSPROXY_HEAPSIZE=2000
# Extra Java runtime options. Empty by default.
# export HDFSPROXY_OPTS=
# Extra ssh options. Empty by default.
# export HDFSPROXY_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HDFSPROXY_CONF_DIR"
# Where log files are stored. $HDFSPROXY_HOME/logs by default.
# export HDFSPROXY_LOG_DIR=${HDFSPROXY_HOME}/logs
# File naming remote slave hosts. $HDFSPROXY_HOME/conf/slaves by default.
# export HDFSPROXY_SLAVES=${HDFSPROXY_HOME}/conf/slaves
# host:path where hdfsproxy code should be rsync'd from. Unset by default.
# export HDFSPROXY_MASTER=master:/home/$USER/src/hdfsproxy
# Seconds to sleep between slave commands. Unset by default. This
# can be useful in large clusters, where, e.g., slave rsyncs can
# otherwise arrive faster than the master can service them.
# export HDFSPROXY_SLAVE_SLEEP=0.1
# The directory where pid files are stored. /tmp by default.
# export HDFSPROXY_PID_DIR=/var/hdfsproxy/pids
# A string representing this instance of hdfsproxy. $USER by default.
# export HDFSPROXY_IDENT_STRING=$USER
# The scheduling priority for daemon processes. See 'man nice'.
# export HDFSPROXY_NICENESS=10

Просмотреть файл

@ -1,44 +0,0 @@
# Set HdfsProxy-specific environment variables here.
# The only required environment variable is JAVA_HOME. All others are
# optional. When running a distributed configuration it is best to
# set JAVA_HOME in this file, so that it is correctly defined on
# remote nodes.
# The java implementation to use. Required.
# export JAVA_HOME=/usr/lib/j2sdk1.5-sun
# Extra Java CLASSPATH elements. Optional.
# export HDFSPROXY_CLASSPATH=
# The maximum amount of heap to use, in MB. Default is 1000.
# export HDFSPROXY_HEAPSIZE=2000
# Extra Java runtime options. Empty by default.
# export HDFSPROXY_OPTS=
# Extra ssh options. Empty by default.
# export HDFSPROXY_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HDFSPROXY_CONF_DIR"
# Where log files are stored. $HDFSPROXY_HOME/logs by default.
# export HDFSPROXY_LOG_DIR=${HDFSPROXY_HOME}/logs
# File naming remote slave hosts. $HDFSPROXY_HOME/conf/slaves by default.
# export HDFSPROXY_SLAVES=${HDFSPROXY_HOME}/conf/slaves
# host:path where hdfsproxy code should be rsync'd from. Unset by default.
# export HDFSPROXY_MASTER=master:/home/$USER/src/hdfsproxy
# Seconds to sleep between slave commands. Unset by default. This
# can be useful in large clusters, where, e.g., slave rsyncs can
# otherwise arrive faster than the master can service them.
# export HDFSPROXY_SLAVE_SLEEP=0.1
# The directory where pid files are stored. /tmp by default.
# export HDFSPROXY_PID_DIR=/var/hdfsproxy/pids
# A string representing this instance of hdfsproxy. $USER by default.
# export HDFSPROXY_IDENT_STRING=$USER
# The scheduling priority for daemon processes. See 'man nice'.
# export HDFSPROXY_NICENESS=10

Просмотреть файл

@ -1 +0,0 @@
localhost

Просмотреть файл

@ -1,61 +0,0 @@
# Define some default values that can be overridden by system properties
hdfsproxy.root.logger=INFO,console
hdfsproxy.log.dir=.
hdfsproxy.log.file=hdfsproxy.log
# Define the root logger to the system property "hdfsproxy.root.logger".
log4j.rootLogger=${hdfsproxy.root.logger}
# Logging Threshold
log4j.threshhold=ALL
#
# Daily Rolling File Appender
#
log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
log4j.appender.DRFA.File=${hdfsproxy.log.dir}/${hdfsproxy.log.file}
# Rollver at midnight
log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
# 30-day backup
#log4j.appender.DRFA.MaxBackupIndex=30
log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
# Pattern format: Date LogLevel LoggerName LogMessage
log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
# Debugging Pattern format
#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
#
# console
# Add "console" to rootlogger above if you want to use this
#
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
#
# Rolling File Appender
#
#log4j.appender.RFA=org.apache.log4j.RollingFileAppender
#log4j.appender.RFA.File=${hdfsproxy.log.dir}/${hdfsproxy.log.file}
# Logfile size and and 30-day backups
#log4j.appender.RFA.MaxFileSize=1MB
#log4j.appender.RFA.MaxBackupIndex=30
#log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
# Custom Logging levels
#log4j.logger.org.apache.hadoop.hdfsproxy.HttpsProxy=DEBUG
#log4j.logger.org.apache.hadoop.hdfsproxy.ProxyFilter=DEBUG

Просмотреть файл

@ -1,26 +0,0 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
This file defines the mappings from username to comma seperated list
of certificate serial numbers that the user is allowed to use. One mapping
per user. Wildcard characters, such as "*" and "?", are not recognized.
Any leading or trailing whitespaces are stripped/ignored. Note that user
"Admin" is the special hdfsproxy admin user. To make a user an admin, add
the user's certificate serial number to user "Admin". Normal users cannot
have "Admin" as username. Usernames can only comprise of 0-9a-zA-Z and
underscore.
-->
<configuration>
<property>
<name>Admin</name>
<value></value>
<description> Special hdfsproxy admin user
</description>
</property>
</configuration>

Просмотреть файл

@ -1,28 +0,0 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
This file defines the mappings from username to comma seperated list
of directories/files that the user is allowed to use. One mapping
per user. Wildcard characters, such as "*" and "?", are not recognized.
For example, to match "/output" directory, one can use "/output" or
"/output/", but not "/output/*". Any leading or trailing whitespaces
in the name field are stripped/ignored, while only leading whitespaces
in the value field are. Note that the special hdfsproxy admin user "Admin"
doesn't automatically have access to any files, unless explicitly
specified in this file. Usernames can only comprise of 0-9a-zA-Z and
underscore.
-->
<configuration>
<property>
<name></name>
<value></value>
<description>
</description>
</property>
</configuration>

Двоичный файл не отображается.

Двоичный файл не отображается.

Двоичный файл не отображается.

Двоичный файл не отображается.

Просмотреть файл

@ -1,47 +0,0 @@
#!/bin/sh
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
this="$0"
while [ -h "$this" ]; do
ls=`ls -ld "$this"`
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '.*/.*' > /dev/null; then
this="$link"
else
this=`dirname "$this"`/"$link"
fi
done
# convert relative path to absolute path
bin=`dirname "$this"`
script=`basename "$this"`
bin=`cd "$bin"; pwd`
this="$bin/$script"
# Check if HADOOP_HOME AND JAVA_HOME is set.
if [ -z $HADOOP_HOME ] ; then
echo "HADOOP_HOME environment variable not defined"
exit -1;
fi
if [ -z $JAVA_HOME ] ; then
echo "JAVA_HOME environment variable not defined"
exit -1;
fi
hadoopVersion=`$HADOOP_HOME/bin/hadoop version | awk 'BEGIN { RS = "" ; FS = "\n" } ; { print $1 }' | awk '{print $2}'`
$JAVA_HOME/bin/java -classpath $HADOOP_HOME/hadoop-${hadoopVersion}-core.jar:$HADOOP_HOME/contrib/vaidya/hadoop-${hadoopVersion}-vaidya.jar:$HADOOP_HOME/lib/commons-logging-1.0.4.jar:${CLASSPATH} org.apache.hadoop.vaidya.postexdiagnosis.PostExPerformanceDiagnoser $@

Просмотреть файл

@ -1,104 +0,0 @@
<?xml version="1.0" encoding="ISO-8859-1"?>
<!--
**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
**
-->
<!-- This is a diagnostic test configuration file. Diagnostic test driver
reads this file to get the list of tests and their configuration information
Title : Provides brief description of the test
ClassName : Provides the fully qualified java class name that implements the test condition
Description : Provides detailed information about the test describing how it checks for a specific
performance problem.
SuccessThreshold : (value between [0..1])
: Evaluation of a diagnostic test returns its level of impact on the job
performance. If impact value [between 0..1] is equal or greater than the
success threshold, means rule has detected the problem (TEST POSITIVE) else
rule has passed the test (TEST NEGATIVE). The impact level is calculated and
returned by each test's evaluate method. For tests that are boolean in nature
the impact level is either 0 or 1 and success threshold should be 1.
Importance : Indicates relative importance of this diagnostic test among the set of
diagnostic rules defined in this file. Three declarative values that
can be assigned are High, Medium or Low
Prescription : This is an optional element to store the advice to be included in the report upon test failure
This is overwritten in the report by any advice/prescription text returned by getPrescription method of
DiagnosticTest.
InputElement : Input element is made available to the diagnostic test for it to interpret and accept
any parameters specific to the test. These test specific parameters are used to configure
the tests without changing the java code.
-->
<PostExPerformanceDiagnosisTests>
<DiagnosticTest>
<Title><![CDATA[Balanaced Reduce Partitioning]]></Title>
<ClassName><![CDATA[org.apache.hadoop.vaidya.postexdiagnosis.tests.BalancedReducePartitioning]]></ClassName>
<Description><![CDATA[This rule tests as to how well the input to reduce tasks is balanced]]></Description>
<Importance><![CDATA[High]]></Importance>
<SuccessThreshold><![CDATA[0.20]]></SuccessThreshold>
<Prescription><![CDATA[advice]]></Prescription>
<InputElement>
<PercentReduceRecords><![CDATA[0.85]]></PercentReduceRecords>
</InputElement>
</DiagnosticTest>
<DiagnosticTest>
<Title><![CDATA[Impact of Map tasks Re-Execution]]></Title>
<ClassName><![CDATA[org.apache.hadoop.vaidya.postexdiagnosis.tests.MapsReExecutionImpact]]></ClassName>
<Description><![CDATA[This test rule checks percentage of map task re-execution impacting the job performance]]></Description>
<Importance><![CDATA[High]]></Importance>
<SuccessThreshold><![CDATA[0.40]]></SuccessThreshold>
<Prescription><![CDATA[default advice]]></Prescription>
<InputElement>
</InputElement>
</DiagnosticTest>
<DiagnosticTest>
<Title><![CDATA[Impact of Reduce tasks Re-Execution]]></Title>
<ClassName><![CDATA[org.apache.hadoop.vaidya.postexdiagnosis.tests.ReducesReExecutionImpact]]></ClassName>
<Description><![CDATA[This test rule checks percentage of reduce task re-execution impacting the job performance]]></Description>
<Importance><![CDATA[High]]></Importance>
<SuccessThreshold><![CDATA[0.40]]></SuccessThreshold>
<Prescription><![CDATA[default advice]]></Prescription>
<InputElement>
</InputElement>
</DiagnosticTest>
<DiagnosticTest>
<Title><![CDATA[Map and/or Reduce tasks reading HDFS data as a side effect]]></Title>
<ClassName><![CDATA[org.apache.hadoop.vaidya.postexdiagnosis.tests.ReadingHDFSFilesAsSideEffect]]></ClassName>
<Description><![CDATA[This test rule checks if map/reduce tasks are reading data from HDFS as a side effect. More the data read as a side effect can potentially be a bottleneck across parallel execution of map/reduce tasks.]]></Description>
<Importance><![CDATA[High]]></Importance>
<SuccessThreshold><![CDATA[0.05]]></SuccessThreshold>
<Prescription><![CDATA[default advice]]></Prescription>
<InputElement>
</InputElement>
</DiagnosticTest>
<DiagnosticTest>
<Title><![CDATA[Map side disk spill]]></Title>
<ClassName><![CDATA[org.apache.hadoop.vaidya.postexdiagnosis.tests.MapSideDiskSpill]]></ClassName>
<Description><![CDATA[This test rule checks if Map tasks are spilling the data on to the local disk during the map side sorting due to insufficient sort buffer size. The impact is calculated as ratio between local bytes written to map output bytes. Impact is normalized using NormalizationFactor given below and any value greater than or equal to normalization factor is treated as maximum (i.e. 1). ]]></Description>
<Importance><![CDATA[Low]]></Importance>
<SuccessThreshold><![CDATA[0.3]]></SuccessThreshold>
<Prescription><![CDATA[default advice]]></Prescription>
<InputElement>
<NormalizationFactor>3.0</NormalizationFactor>
</InputElement>
</DiagnosticTest>
</PostExPerformanceDiagnosisTests>

Двоичный файл не отображается.

Двоичные данные
core/lib/hadoop-0.20.0/lib/.DS_Store поставляемый

Двоичный файл не отображается.

Двоичный файл не отображается.

Двоичные данные
core/lib/hadoop-0.20.0/lib/native/.DS_Store поставляемый

Двоичный файл не отображается.

Двоичный файл не отображается.

Двоичный файл не отображается.

Двоичный файл не отображается.

Двоичный файл не отображается.

Двоичный файл не отображается.

Двоичный файл не отображается.

Двоичный файл не отображается.

Двоичный файл не отображается.

Двоичные данные
core/lib/hadoop-0.20.0/librecordio/librecordio.a

Двоичный файл не отображается.

Просмотреть файл

@ -1,40 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<web-app xmlns="http://java.sun.com/xml/ns/javaee"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://java.sun.com/xml/ns/javaee http://java.sun.com/xml/ns/javaee/web-app_2_5.xsd"
version="2.5">
<servlet>
<servlet-name>org.apache.hadoop.hdfs.server.datanode.browseDirectory_jsp</servlet-name>
<servlet-class>org.apache.hadoop.hdfs.server.datanode.browseDirectory_jsp</servlet-class>
</servlet>
<servlet>
<servlet-name>org.apache.hadoop.hdfs.server.datanode.tail_jsp</servlet-name>
<servlet-class>org.apache.hadoop.hdfs.server.datanode.tail_jsp</servlet-class>
</servlet>
<servlet>
<servlet-name>org.apache.hadoop.hdfs.server.datanode.browseBlock_jsp</servlet-name>
<servlet-class>org.apache.hadoop.hdfs.server.datanode.browseBlock_jsp</servlet-class>
</servlet>
<servlet-mapping>
<servlet-name>org.apache.hadoop.hdfs.server.datanode.browseDirectory_jsp</servlet-name>
<url-pattern>/browseDirectory.jsp</url-pattern>
</servlet-mapping>
<servlet-mapping>
<servlet-name>org.apache.hadoop.hdfs.server.datanode.tail_jsp</servlet-name>
<url-pattern>/tail.jsp</url-pattern>
</servlet-mapping>
<servlet-mapping>
<servlet-name>org.apache.hadoop.hdfs.server.datanode.browseBlock_jsp</servlet-name>
<url-pattern>/browseBlock.jsp</url-pattern>
</servlet-mapping>
</web-app>

Просмотреть файл

@ -1,40 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<web-app xmlns="http://java.sun.com/xml/ns/javaee"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://java.sun.com/xml/ns/javaee http://java.sun.com/xml/ns/javaee/web-app_2_5.xsd"
version="2.5">
<servlet>
<servlet-name>org.apache.hadoop.hdfs.server.namenode.nn_005fbrowsedfscontent_jsp</servlet-name>
<servlet-class>org.apache.hadoop.hdfs.server.namenode.nn_005fbrowsedfscontent_jsp</servlet-class>
</servlet>
<servlet>
<servlet-name>org.apache.hadoop.hdfs.server.namenode.dfsnodelist_jsp</servlet-name>
<servlet-class>org.apache.hadoop.hdfs.server.namenode.dfsnodelist_jsp</servlet-class>
</servlet>
<servlet>
<servlet-name>org.apache.hadoop.hdfs.server.namenode.dfshealth_jsp</servlet-name>
<servlet-class>org.apache.hadoop.hdfs.server.namenode.dfshealth_jsp</servlet-class>
</servlet>
<servlet-mapping>
<servlet-name>org.apache.hadoop.hdfs.server.namenode.nn_005fbrowsedfscontent_jsp</servlet-name>
<url-pattern>/nn_browsedfscontent.jsp</url-pattern>
</servlet-mapping>
<servlet-mapping>
<servlet-name>org.apache.hadoop.hdfs.server.namenode.dfsnodelist_jsp</servlet-name>
<url-pattern>/dfsnodelist.jsp</url-pattern>
</servlet-mapping>
<servlet-mapping>
<servlet-name>org.apache.hadoop.hdfs.server.namenode.dfshealth_jsp</servlet-name>
<url-pattern>/dfshealth.jsp</url-pattern>
</servlet-mapping>
</web-app>

Просмотреть файл

@ -1,20 +0,0 @@
<meta HTTP-EQUIV="REFRESH" content="0;url=dfshealth.jsp"/>
<html>
<head>
<title>Hadoop Administration</title>
</head>
<body>
<h1>Hadoop Administration</h1>
<ul>
<li><a href="dfshealth.jsp">DFS Health/Status</a></li>
</ul>
</body>
</html>

Просмотреть файл

@ -1,180 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<web-app xmlns="http://java.sun.com/xml/ns/javaee"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://java.sun.com/xml/ns/javaee http://java.sun.com/xml/ns/javaee/web-app_2_5.xsd"
version="2.5">
<servlet>
<servlet-name>org.apache.hadoop.mapred.jobqueue_005fdetails_jsp</servlet-name>
<servlet-class>org.apache.hadoop.mapred.jobqueue_005fdetails_jsp</servlet-class>
</servlet>
<servlet>
<servlet-name>org.apache.hadoop.mapred.jobtracker_jsp</servlet-name>
<servlet-class>org.apache.hadoop.mapred.jobtracker_jsp</servlet-class>
</servlet>
<servlet>
<servlet-name>org.apache.hadoop.mapred.machines_jsp</servlet-name>
<servlet-class>org.apache.hadoop.mapred.machines_jsp</servlet-class>
</servlet>
<servlet>
<servlet-name>org.apache.hadoop.mapred.taskdetailshistory_jsp</servlet-name>
<servlet-class>org.apache.hadoop.mapred.taskdetailshistory_jsp</servlet-class>
</servlet>
<servlet>
<servlet-name>org.apache.hadoop.mapred.jobhistory_jsp</servlet-name>
<servlet-class>org.apache.hadoop.mapred.jobhistory_jsp</servlet-class>
</servlet>
<servlet>
<servlet-name>org.apache.hadoop.mapred.jobconf_005fhistory_jsp</servlet-name>
<servlet-class>org.apache.hadoop.mapred.jobconf_005fhistory_jsp</servlet-class>
</servlet>
<servlet>
<servlet-name>org.apache.hadoop.mapred.loadhistory_jsp</servlet-name>
<servlet-class>org.apache.hadoop.mapred.loadhistory_jsp</servlet-class>
</servlet>
<servlet>
<servlet-name>org.apache.hadoop.mapred.jobdetailshistory_jsp</servlet-name>
<servlet-class>org.apache.hadoop.mapred.jobdetailshistory_jsp</servlet-class>
</servlet>
<servlet>
<servlet-name>org.apache.hadoop.mapred.jobtaskshistory_jsp</servlet-name>
<servlet-class>org.apache.hadoop.mapred.jobtaskshistory_jsp</servlet-class>
</servlet>
<servlet>
<servlet-name>org.apache.hadoop.mapred.jobfailures_jsp</servlet-name>
<servlet-class>org.apache.hadoop.mapred.jobfailures_jsp</servlet-class>
</servlet>
<servlet>
<servlet-name>org.apache.hadoop.mapred.taskdetails_jsp</servlet-name>
<servlet-class>org.apache.hadoop.mapred.taskdetails_jsp</servlet-class>
</servlet>
<servlet>
<servlet-name>org.apache.hadoop.mapred.analysejobhistory_jsp</servlet-name>
<servlet-class>org.apache.hadoop.mapred.analysejobhistory_jsp</servlet-class>
</servlet>
<servlet>
<servlet-name>org.apache.hadoop.mapred.jobblacklistedtrackers_jsp</servlet-name>
<servlet-class>org.apache.hadoop.mapred.jobblacklistedtrackers_jsp</servlet-class>
</servlet>
<servlet>
<servlet-name>org.apache.hadoop.mapred.jobdetails_jsp</servlet-name>
<servlet-class>org.apache.hadoop.mapred.jobdetails_jsp</servlet-class>
</servlet>
<servlet>
<servlet-name>org.apache.hadoop.mapred.jobtasks_jsp</servlet-name>
<servlet-class>org.apache.hadoop.mapred.jobtasks_jsp</servlet-class>
</servlet>
<servlet>
<servlet-name>org.apache.hadoop.mapred.jobconf_jsp</servlet-name>
<servlet-class>org.apache.hadoop.mapred.jobconf_jsp</servlet-class>
</servlet>
<servlet>
<servlet-name>org.apache.hadoop.mapred.taskstats_jsp</servlet-name>
<servlet-class>org.apache.hadoop.mapred.taskstats_jsp</servlet-class>
</servlet>
<servlet-mapping>
<servlet-name>org.apache.hadoop.mapred.jobqueue_005fdetails_jsp</servlet-name>
<url-pattern>/jobqueue_details.jsp</url-pattern>
</servlet-mapping>
<servlet-mapping>
<servlet-name>org.apache.hadoop.mapred.jobtracker_jsp</servlet-name>
<url-pattern>/jobtracker.jsp</url-pattern>
</servlet-mapping>
<servlet-mapping>
<servlet-name>org.apache.hadoop.mapred.machines_jsp</servlet-name>
<url-pattern>/machines.jsp</url-pattern>
</servlet-mapping>
<servlet-mapping>
<servlet-name>org.apache.hadoop.mapred.taskdetailshistory_jsp</servlet-name>
<url-pattern>/taskdetailshistory.jsp</url-pattern>
</servlet-mapping>
<servlet-mapping>
<servlet-name>org.apache.hadoop.mapred.jobhistory_jsp</servlet-name>
<url-pattern>/jobhistory.jsp</url-pattern>
</servlet-mapping>
<servlet-mapping>
<servlet-name>org.apache.hadoop.mapred.jobconf_005fhistory_jsp</servlet-name>
<url-pattern>/jobconf_history.jsp</url-pattern>
</servlet-mapping>
<servlet-mapping>
<servlet-name>org.apache.hadoop.mapred.loadhistory_jsp</servlet-name>
<url-pattern>/loadhistory.jsp</url-pattern>
</servlet-mapping>
<servlet-mapping>
<servlet-name>org.apache.hadoop.mapred.jobdetailshistory_jsp</servlet-name>
<url-pattern>/jobdetailshistory.jsp</url-pattern>
</servlet-mapping>
<servlet-mapping>
<servlet-name>org.apache.hadoop.mapred.jobtaskshistory_jsp</servlet-name>
<url-pattern>/jobtaskshistory.jsp</url-pattern>
</servlet-mapping>
<servlet-mapping>
<servlet-name>org.apache.hadoop.mapred.jobfailures_jsp</servlet-name>
<url-pattern>/jobfailures.jsp</url-pattern>
</servlet-mapping>
<servlet-mapping>
<servlet-name>org.apache.hadoop.mapred.taskdetails_jsp</servlet-name>
<url-pattern>/taskdetails.jsp</url-pattern>
</servlet-mapping>
<servlet-mapping>
<servlet-name>org.apache.hadoop.mapred.analysejobhistory_jsp</servlet-name>
<url-pattern>/analysejobhistory.jsp</url-pattern>
</servlet-mapping>
<servlet-mapping>
<servlet-name>org.apache.hadoop.mapred.jobblacklistedtrackers_jsp</servlet-name>
<url-pattern>/jobblacklistedtrackers.jsp</url-pattern>
</servlet-mapping>
<servlet-mapping>
<servlet-name>org.apache.hadoop.mapred.jobdetails_jsp</servlet-name>
<url-pattern>/jobdetails.jsp</url-pattern>
</servlet-mapping>
<servlet-mapping>
<servlet-name>org.apache.hadoop.mapred.jobtasks_jsp</servlet-name>
<url-pattern>/jobtasks.jsp</url-pattern>
</servlet-mapping>
<servlet-mapping>
<servlet-name>org.apache.hadoop.mapred.jobconf_jsp</servlet-name>
<url-pattern>/jobconf.jsp</url-pattern>
</servlet-mapping>
<servlet-mapping>
<servlet-name>org.apache.hadoop.mapred.taskstats_jsp</servlet-name>
<url-pattern>/taskstats.jsp</url-pattern>
</servlet-mapping>
</web-app>

Просмотреть файл

@ -1,20 +0,0 @@
<meta HTTP-EQUIV="REFRESH" content="0;url=jobtracker.jsp"/>
<html>
<head>
<title>Hadoop Administration</title>
</head>
<body>
<h1>Hadoop Administration</h1>
<ul>
<li><a href="jobtracker.jsp">JobTracker</a></li>
</ul>
</body>
</html>

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 9.2 KiB

Просмотреть файл

@ -1,134 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
body {
background-color : #ffffff;
font-family : sans-serif;
}
.small {
font-size : smaller;
}
div#dfsnodetable tr#row1, div#dfstable td#col1 {
font-weight : bolder;
}
div#dfstable td#col1 {
vertical-align : top;
}
div#dfstable td#col3 {
text-align : right;
}
div#dfsnodetable caption {
text-align : left;
}
div#dfsnodetable a#title {
font-size : larger;
font-weight : bolder;
}
div#dfsnodetable td, th {
border-bottom-style : none;
padding-bottom : 4px;
padding-top : 4px;
}
div#dfsnodetable A:link, A:visited {
text-decoration : none;
}
div#dfsnodetable th.header, th.headerASC, th.headerDSC {
padding-bottom : 8px;
padding-top : 8px;
}
div#dfsnodetable th.header:hover, th.headerASC:hover, th.headerDSC:hover,
td.name:hover {
text-decoration : underline;
cursor : pointer;
}
div#dfsnodetable td.blocks, td.size, td.pcused, td.adminstate, td.lastcontact {
text-align : right;
}
div#dfsnodetable .rowNormal .header {
background-color : #ffffff;
}
div#dfsnodetable .rowAlt, .headerASC, .headerDSC {
background-color : lightyellow;
}
.warning {
font-weight : bolder;
color : red;
}
div#dfstable table {
white-space : pre;
}
div#dfsnodetable td, div#dfsnodetable th, div#dfstable td {
padding-left : 10px;
padding-right : 10px;
}
td.perc_filled {
background-color:#AAAAFF;
}
td.perc_nonfilled {
background-color:#FFFFFF;
}
line.taskgraphline {
stroke-width:1;stroke-linecap:round;
}
#quicklinks {
margin: 0;
padding: 2px 4px;
position: fixed;
top: 0;
right: 0;
text-align: right;
background-color: #eee;
font-weight: bold;
}
#quicklinks ul {
margin: 0;
padding: 0;
list-style-type: none;
font-weight: normal;
}
#quicklinks ul {
display: none;
}
#quicklinks a {
font-size: smaller;
text-decoration: none;
}
#quicklinks ul a {
text-decoration: underline;
}

Просмотреть файл

@ -1,18 +0,0 @@
<?xml version="1.0"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:output method="html"/>
<xsl:template match="configuration">
<table border="1" align="center" >
<tr>
<th>name</th>
<th>value</th>
</tr>
<xsl:for-each select="property">
<tr>
<td width="35%"><b><xsl:value-of select="name"/></b></td>
<td width="65%"><xsl:value-of select="value"/></td>
</tr>
</xsl:for-each>
</table>
</xsl:template>
</xsl:stylesheet>

Просмотреть файл

@ -1,151 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
function checkButtonVerbage()
{
var inputs = document.getElementsByName("jobCheckBox");
var check = getCheckStatus(inputs);
setCheckButtonVerbage(! check);
}
function selectAll()
{
var inputs = document.getElementsByName("jobCheckBox");
var check = getCheckStatus(inputs);
for (var i in inputs) {
if ('jobCheckBox' == inputs[i].name) {
if ( inputs[i].parentNode.parentNode.style.display != 'none') {
inputs[i].checked = ! check;
}
}
}
setCheckButtonVerbage(check);
}
function getCheckStatus(inputs)
{
var check = true;
for (var i in inputs) {
if ('jobCheckBox' == inputs[i].name) {
if ( inputs[i].parentNode.parentNode.style.display != 'none') {
check = (inputs[i].checked && check);
}
}
}
return check;
}
function setCheckButtonVerbage(check)
{
var op = document.getElementById("checkEm");
op.value = check ? "Select All" : "Deselect All";
}
function applyfilter()
{
var cols = ["job","priority","user","name"];
var nodes = [];
var filters = [];
for (var i = 0; i < cols.length; ++i) {
nodes[i] = document.getElementById(cols[i] + "_0" );
}
var filter = document.getElementById("filter");
filters = filter.value.split(' ');
var row = 0;
while ( nodes[0] != null ) {
//default display status
var display = true;
// for each filter
for (var filter_idx = 0; filter_idx < filters.length; ++filter_idx) {
// go check each column
if ((getDisplayStatus(nodes, filters[filter_idx], cols)) == 0) {
display = false;
break;
}
}
// set the display status
nodes[0].parentNode.style.display = display ? '' : 'none';
// next row
++row;
// next set of controls
for (var i = 0; i < cols.length; ++i) {
nodes[i] = document.getElementById(cols[i] + "_" + row);
}
} // while
}
function getDisplayStatus(nodes, filter, cols)
{
var offset = filter.indexOf(':');
var search = offset != -1 ? filter.substring(offset + 1).toLowerCase() : filter.toLowerCase();
for (var col = 0; col < cols.length; ++col) {
// a column specific filter
if (offset != -1 ) {
var searchCol = filter.substring(0, offset).toLowerCase();
if (searchCol == cols[col]) {
// special case jobs to remove unnecessary stuff
return containsIgnoreCase(stripHtml(nodes[col].innerHTML), search);
}
} else if (containsIgnoreCase(stripHtml(nodes[col].innerHTML), filter)) {
return true;
}
}
return false;
}
function stripHtml(text)
{
return text.replace(/<[^>]*>/g,'').replace(/&[^;]*;/g,'');
}
function containsIgnoreCase(haystack, needle)
{
return haystack.toLowerCase().indexOf(needle.toLowerCase()) != -1;
}
function confirmAction()
{
return confirm("Are you sure?");
}
function toggle(id)
{
if ( document.getElementById(id).style.display != 'block') {
document.getElementById(id).style.display = 'block';
}
else {
document.getElementById(id).style.display = 'none';
}
}

Просмотреть файл

@ -1,20 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<web-app xmlns="http://java.sun.com/xml/ns/javaee"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://java.sun.com/xml/ns/javaee http://java.sun.com/xml/ns/javaee/web-app_2_5.xsd"
version="2.5">
<servlet>
<servlet-name>org.apache.hadoop.mapred.tasktracker_jsp</servlet-name>
<servlet-class>org.apache.hadoop.mapred.tasktracker_jsp</servlet-class>
</servlet>
<servlet-mapping>
<servlet-name>org.apache.hadoop.mapred.tasktracker_jsp</servlet-name>
<url-pattern>/tasktracker.jsp</url-pattern>
</servlet-mapping>
</web-app>

Просмотреть файл

@ -1 +0,0 @@
<meta HTTP-EQUIV="REFRESH" content="0;url=tasktracker.jsp"/>

Просмотреть файл

@ -1,5 +1,387 @@
Hadoop Change Log
Release 0.20.3 - Unreleased
Release 0.20.2 - 2010-2-19
NEW FEATURES
HADOOP-6218. Adds a feature where TFile can be split by Record
Sequence number. (Hong Tang and Raghu Angadi via ddas)
BUG FIXES
MAPREDUCE-112. Add counters for reduce input, output records to the new API.
(Jothi Padmanabhan via cdouglas)
HADOOP-6231. Allow caching of filesystem instances to be disabled on a
per-instance basis (Tom White and Ben Slusky via mahadev)
MAPREDUCE-826. harchive doesn't use ToolRunner / harchive returns 0 even
if the job fails with exception (koji via mahadev)
MAPREDUCE-979. Fixed JobConf APIs related to memory parameters to return
values of new configuration variables when deprecated variables are
disabled. (Sreekanth Ramakrishnan via yhemanth)
HDFS-686. NullPointerException is thrown while merging edit log and image.
(hairong)
HDFS-677. Rename failure when both source and destination quota exceeds
results in deletion of source. (suresh)
HDFS-709. Fix TestDFSShell failure due to rename bug introduced by
HDFS-677. (suresh)
HDFS-579. Fix DfsTask to follow the semantics of 0.19, regarding non-zero
return values as failures. (Christian Kunz via cdouglas)
MAPREDUCE-1070. Prevent a deadlock in the fair scheduler servlet.
(Todd Lipcon via cdouglas)
HADOOP-5759. Fix for IllegalArgumentException when CombineFileInputFormat
is used as job InputFormat. (Amareshwari Sriramadasu via zshao)
HADOOP-6097. Fix Path conversion in makeQualified and reset LineReader byte
count at the start of each block in Hadoop archives. (Ben Slusky, Tom
White, and Mahadev Konar via cdouglas)
HDFS-723. Fix deadlock in DFSClient#DFSOutputStream. (hairong)
HDFS-732. DFSClient.DFSOutputStream.close() should throw an exception if
the stream cannot be closed successfully. (szetszwo)
MAPREDUCE-1163. Remove unused, hard-coded paths from libhdfs. (Allen
Wittenauer via cdouglas)
HDFS-761. Fix failure to process rename operation from edits log due to
quota verification. (suresh)
MAPREDUCE-623. Resolve javac warnings in mapreduce. (Jothi Padmanabhan
via sharad)
HADOOP-6575. Remove call to fault injection tests not present in 0.20.
(cdouglas)
HADOOP-6576. Fix streaming test failures on 0.20. (Todd Lipcon via cdouglas)
IMPROVEMENTS
HADOOP-5611. Fix C++ libraries to build on Debian Lenny. (Todd Lipcon
via tomwhite)
MAPREDUCE-1068. Fix streaming job to show proper message if file is
is not present. (Amareshwari Sriramadasu via sharad)
HDFS-596. Fix memory leak in hdfsFreeFileInfo() for libhdfs.
(Zhang Bingjun via dhruba)
MAPREDUCE-1147. Add map output counters to new API. (Amar Kamat via
cdouglas)
HADOOP-6269. Fix threading issue with defaultResource in Configuration.
(Sreekanth Ramakrishnan via cdouglas)
MAPREDUCE-1182. Fix overflow in reduce causing allocations to exceed the
configured threshold. (cdouglas)
HADOOP-6386. NameNode's HttpServer can't instantiate InetSocketAddress:
IllegalArgumentException is thrown. (cos)
HDFS-185. Disallow chown, chgrp, chmod, setQuota, and setSpaceQuota when
name-node is in safemode. (Ravi Phulari via shv)
HADOOP-6428. HttpServer sleeps with negative values (cos)
HADOOP-5623. Fixes a problem to do with status messages getting overwritten
in streaming jobs. (Rick Cox and Jothi Padmanabhan via tomwhite)
HADOOP-6315. Avoid incorrect use of BuiltInflater/BuiltInDeflater in
GzipCodec. (Aaron Kimball via cdouglas)
HDFS-187. Initialize secondary namenode http address in TestStartup.
(Todd Lipcon via szetszwo)
MAPREDUCE-433. Use more reliable counters in TestReduceFetch. (cdouglas)
HDFS-792. DFSClient 0.20.1 is incompatible with HDFS 0.20.2.
(Tod Lipcon via hairong)
HADOOP-6498. IPC client bug may cause rpc call hang. (Ruyue Ma and
hairong via hairong)
HADOOP-6596. Failing tests prevent the rest of test targets from
execution. (cos)
HADOOP-6524. Contrib tests are failing Clover'ed build. (cos)
HDFS-919. Create test to validate the BlocksVerified metric (Gary Murry
via cos)
HDFS-907. Add tests for getBlockLocations and totalLoad metrics.
(Ravi Phulari via cos)
MAPREDUCE-1251. c++ utils doesn't compile. (Eli Collins via tomwhite)
HADOOP-5612. Some c++ scripts are not chmodded before ant execution.
(Todd Lipcon via tomwhite)
Release 0.20.1 - 2009-09-01
INCOMPATIBLE CHANGES
HADOOP-5726. Remove pre-emption from capacity scheduler code base.
(Rahul Kumar Singh via yhemanth)
HADOOP-5881. Simplify memory monitoring and scheduling related
configuration. (Vinod Kumar Vavilapalli via yhemanth)
NEW FEATURES
HADOOP-6080. Introduce -skipTrash option to rm and rmr.
(Jakob Homan via shv)
HADOOP-3315. Add a new, binary file foramt, TFile. (Hong Tang via cdouglas)
IMPROVEMENTS
HADOOP-5711. Change Namenode file close log to info. (szetszwo)
HADOOP-5736. Update the capacity scheduler documentation for features
like memory based scheduling, job initialization and removal of pre-emption.
(Sreekanth Ramakrishnan via yhemanth)
HADOOP-4674. Fix fs help messages for -test, -text, -tail, -stat
and -touchz options. (Ravi Phulari via szetszwo)
HADOOP-4372. Improves the way history filenames are obtained and manipulated.
(Amar Kamat via ddas)
HADOOP-5897. Add name-node metrics to capture java heap usage.
(Suresh Srinivas via shv)
HDFS-438. Improve help message for space quota command. (Raghu Angadi)
MAPREDUCE-767. Remove the dependence on the CLI 2.0 snapshot.
(Amar Kamat via ddas)
OPTIMIZATIONS
BUG FIXES
HADOOP-5691. Makes org.apache.hadoop.mapreduce.Reducer concrete class
instead of abstract. (Amareshwari Sriramadasu via sharad)
HADOOP-5646. Fixes a problem in TestQueueCapacities.
(Vinod Kumar Vavilapalli via ddas)
HADOOP-5655. TestMRServerPorts fails on java.net.BindException. (Devaraj
Das via hairong)
HADOOP-5654. TestReplicationPolicy.<init> fails on java.net.BindException.
(hairong)
HADOOP-5688. Fix HftpFileSystem checksum path construction. (Tsz Wo
(Nicholas) Sze via cdouglas)
HADOOP-5213. Fix Null pointer exception caused when bzip2compression
was used and user closed a output stream without writing any data.
(Zheng Shao via dhruba)
HADOOP-5718. Remove the check for the default queue in capacity scheduler.
(Sreekanth Ramakrishnan via yhemanth)
HADOOP-5719. Remove jobs that failed initialization from the waiting queue
in the capacity scheduler. (Sreekanth Ramakrishnan via yhemanth)
HADOOP-4744. Attaching another fix to the jetty port issue. The TaskTracker
kills itself if it ever discovers that the port to which jetty is actually
bound is invalid (-1). (ddas)
HADOOP-5349. Fixes a problem in LocalDirAllocator to check for the return
path value that is returned for the case where the file we want to write
is of an unknown size. (Vinod Kumar Vavilapalli via ddas)
HADOOP-5636. Prevents a job from going to RUNNING state after it has been
KILLED (this used to happen when the SetupTask would come back with a
success after the job has been killed). (Amar Kamat via ddas)
HADOOP-5641. Fix a NullPointerException in capacity scheduler's memory
based scheduling code when jobs get retired. (yhemanth)
HADOOP-5828. Use absolute path for mapred.local.dir of JobTracker in
MiniMRCluster. (yhemanth)
HADOOP-4981. Fix capacity scheduler to schedule speculative tasks
correctly in the presence of High RAM jobs.
(Sreekanth Ramakrishnan via yhemanth)
HADOOP-5210. Solves a problem in the progress report of the reduce task.
(Ravi Gummadi via ddas)
HADOOP-5850. Fixes a problem to do with not being able to jobs with
0 maps/reduces. (Vinod K V via ddas)
HADOOP-5728. Fixed FSEditLog.printStatistics IndexOutOfBoundsException.
(Wang Xu via johan)
HADOOP-4626. Correct the API links in hdfs forrest doc so that they
point to the same version of hadoop. (szetszwo)
HADOOP-5883. Fixed tasktracker memory monitoring to account for
momentary spurts in memory usage due to java's fork() model.
(yhemanth)
HADOOP-5539. Fixes a problem to do with not preserving intermediate
output compression for merged data.
(Jothi Padmanabhan and Billy Pearson via ddas)
HADOOP-5932. Fixes a problem in capacity scheduler in computing
available memory on a tasktracker.
(Vinod Kumar Vavilapalli via yhemanth)
HADOOP-5648. Fixes a build issue in not being able to generate gridmix.jar
in hadoop binary tarball. (Giridharan Kesavan via gkesavan)
HADOOP-5908. Fixes a problem to do with ArithmeticException in the
JobTracker when there are jobs with 0 maps. (Amar Kamat via ddas)
HADOOP-5924. Fixes a corner case problem to do with job recovery with
empty history files. Also, after a JT restart, sends KillTaskAction to
tasks that report back but the corresponding job hasn't been initialized
yet. (Amar Kamat via ddas)
HADOOP-5882. Fixes a reducer progress update problem for new mapreduce
api. (Amareshwari Sriramadasu via sharad)
HADOOP-5746. Fixes a corner case problem in Streaming, where if an
exception happens in MROutputThread after the last call to the map/reduce
method, the exception goes undetected. (Amar Kamat via ddas)
HADOOP-5884. Fixes accounting in capacity scheduler so that high RAM jobs
take more slots. (Vinod Kumar Vavilapalli via yhemanth)
HADOOP-5937. Correct a safemode message in FSNamesystem. (Ravi Phulari
via szetszwo)
HADOOP-5869. Fix bug in assignment of setup / cleanup task that was
causing TestQueueCapacities to fail.
(Sreekanth Ramakrishnan via yhemanth)
HADOOP-5921. Fixes a problem in the JobTracker where it sometimes never
used to come up due to a system file creation on JobTracker's system-dir
failing. This problem would sometimes show up only when the FS for the
system-dir (usually HDFS) is started at nearly the same time as the
JobTracker. (Amar Kamat via ddas)
HADOOP-5920. Fixes a testcase failure for TestJobHistory.
(Amar Kamat via ddas)
HDFS-26. Better error message to users when commands fail because of
lack of quota. Allow quota to be set even if the limit is lower than
current consumption. (Boris Shkolnik via rangadi)
MAPREDUCE-2. Fixes a bug in KeyFieldBasedPartitioner in handling empty
keys. (Amar Kamat via sharad)
MAPREDUCE-130. Delete the jobconf copy from the log directory of the
JobTracker when the job is retired. (Amar Kamat via sharad)
MAPREDUCE-657. Fix hardcoded filesystem problem in CompletedJobStatusStore.
(Amar Kamat via sharad)
MAPREDUCE-179. Update progress in new RecordReaders. (cdouglas)
MAPREDUCE-124. Fix a bug in failure handling of abort task of
OutputCommiter. (Amareshwari Sriramadasu via sharad)
HADOOP-6139. Fix the FsShell help messages for rm and rmr. (Jakob Homan
via szetszwo)
HADOOP-6141. Fix a few bugs in 0.20 test-patch.sh. (Hong Tang via
szetszwo)
HADOOP-6145. Fix FsShell rm/rmr error messages when there is a FNFE.
(Jakob Homan via szetszwo)
MAPREDUCE-565. Fix partitioner to work with new API. (Owen O'Malley via
cdouglas)
MAPREDUCE-465. Fix a bug in MultithreadedMapRunner. (Amareshwari
Sriramadasu via sharad)
MAPREDUCE-18. Puts some checks to detect cases where jetty serves up
incorrect output during shuffle. (Ravi Gummadi via ddas)
MAPREDUCE-735. Fixes a problem in the KeyFieldHelper to do with
the end index for some inputs (Amar Kamat via ddas)
HADOOP-6150. Users should be able to instantiate comparator using TFile
API. (Hong Tang via rangadi)
MAPREDUCE-383. Fix a bug in Pipes combiner due to bytes count not
getting reset after the spill. (Christian Kunz via sharad)
MAPREDUCE-40. Keep memory management backwards compatible for job
configuration parameters and limits. (Rahul Kumar Singh via yhemanth)
MAPREDUCE-796. Fixes a ClassCastException in an exception log in
MultiThreadedMapRunner. (Amar Kamat via ddas)
MAPREDUCE-838. Fixes a problem in the way commit of task outputs
happens. The bug was that even if commit failed, the task would
be declared as successful. (Amareshwari Sriramadasu via ddas)
MAPREDUCE-805. Fixes some deadlocks in the JobTracker due to the fact
the JobTracker lock hierarchy wasn't maintained in some JobInProgress
method calls. (Amar Kamat via ddas)
HDFS-167. Fix a bug in DFSClient that caused infinite retries on write.
(Bill Zeller via szetszwo)
HDFS-527. Remove unnecessary DFSClient constructors. (szetszwo)
MAPREDUCE-832. Reduce number of warning messages printed when
deprecated memory variables are used. (Rahul Kumar Singh via yhemanth)
MAPREDUCE-745. Fixes a testcase problem to do with generation of JobTracker
IDs. (Amar Kamat via ddas)
MAPREDUCE-834. Enables memory management on tasktrackers when old
memory management parameters are used in configuration.
(Sreekanth Ramakrishnan via yhemanth)
MAPREDUCE-818. Fixes Counters#getGroup API. (Amareshwari Sriramadasu
via sharad)
MAPREDUCE-807. Handles the AccessControlException during the deletion of
mapred.system.dir in the JobTracker. The JobTracker will bail out if it
encounters such an exception. (Amar Kamat via ddas)
HADOOP-6213. Remove commons dependency on commons-cli2. (Amar Kamat via
sharad)
MAPREDUCE-430. Fix a bug related to task getting stuck in case of
OOM error. (Amar Kamat via ddas)
HADOOP-6215. fix GenericOptionParser to deal with -D with '=' in the
value. (Amar Kamat via sharad)
MAPREDUCE-421. Fix Pipes to use returned system exit code.
(Christian Kunz via omalley)
HDFS-525. The SimpleDateFormat object in ListPathsServlet is not thread
safe. (Suresh Srinivas and cdouglas)
MAPREDUCE-911. Fix a bug in TestTaskFail related to speculative
execution. (Amareshwari Sriramadasu via sharad)
MAPREDUCE-687. Fix an assertion in TestMiniMRMapRedDebugScript.
(Amareshwari Sriramadasu via sharad)
MAPREDUCE-924. Fixes the TestPipes testcase to use Tool.
(Amareshwari Sriramadasu via sharad)
Release 0.20.0 - 2009-04-15
INCOMPATIBLE CHANGES
@ -361,6 +743,9 @@ Release 0.20.0 - 2009-04-15
HADOOP-5521. Removes dependency of TestJobInProgress on RESTART_COUNT
JobHistory tag. (Ravi Gummadi via ddas)
HADOOP-5714. Add a metric for NameNode getFileInfo operation. (Jakob Homan
via szetszwo)
OPTIMIZATIONS
HADOOP-3293. Fixes FileInputFormat to do provide locations for splits
@ -945,6 +1330,18 @@ Release 0.19.2 - Unreleased
HADOOP-5551. Prevent directory destruction on file create.
(Brian Bockelman via shv)
HADOOP-5671. Fix FNF exceptions when copying from old versions of
HftpFileSystem. (Tsz Wo (Nicholas), SZE via cdouglas)
HADOOP-5579. Set errno correctly in libhdfs for permission, quota, and FNF
conditions. (Brian Bockelman via cdouglas)
HADOOP-5816. Fixes a problem in the KeyFieldBasedComparator to do with
ArrayIndexOutOfBounds exception. (He Yongqiang via ddas)
HADOOP-5951. Add Apache license header to StorageInfo.java. (Suresh
Srinivas via szetszwo)
Release 0.19.1 - 2009-02-23
IMPROVEMENTS
@ -2035,6 +2432,12 @@ Release 0.18.4 - Unreleased
HADOOP-5557. Fixes some minor problems in TestOverReplicatedBlocks.
(szetszwo)
HADOOP-5644. Namenode is stuck in safe mode. (suresh Srinivas via hairong)
HADOOP-6017. Lease Manager in NameNode does not handle certain characters
in filenames. This results in fatal errors in Secondary NameNode and while
restrating NameNode. (Tsz Wo (Nicholas), SZE via rangadi)
Release 0.18.3 - 2009-01-27
IMPROVEMENTS

Просмотреть файл

Просмотреть файл

Просмотреть файл

Просмотреть файл

Просмотреть файл

Просмотреть файл

@ -27,7 +27,7 @@
<property name="Name" value="Hadoop"/>
<property name="name" value="hadoop"/>
<property name="version" value="0.20.1-dev"/>
<property name="version" value="0.20.3-dev"/>
<property name="final.name" value="${name}-${version}"/>
<property name="year" value="2009"/>
@ -137,7 +137,7 @@
<property name="jdiff.build.dir" value="${build.docs}/jdiff"/>
<property name="jdiff.xml.dir" value="${lib.dir}/jdiff"/>
<property name="jdiff.stable" value="0.19.1"/>
<property name="jdiff.stable" value="0.19.2"/>
<property name="jdiff.stable.javadoc"
value="http://hadoop.apache.org/core/docs/r${jdiff.stable}/api/"/>
@ -704,6 +704,7 @@
<!-- ================================================================== -->
<target name="test-core" depends="jar-test" description="Run core unit tests">
<delete file="${test.build.dir}/testsfailed"/>
<delete dir="${test.build.data}"/>
<mkdir dir="${test.build.data}"/>
<delete dir="${test.log.dir}"/>
@ -728,6 +729,10 @@
<sysproperty key="java.library.path"
value="${build.native}/lib:${lib.dir}/native/${build.platform}"/>
<sysproperty key="install.c++.examples" value="${install.c++.examples}"/>
<!-- set io.compression.codec.lzo.class in the child jvm only if it is set -->
<syspropertyset dynamic="no">
<propertyref name="io.compression.codec.lzo.class"/>
</syspropertyset>
<!-- set compile.c++ in the child jvm only if it is set -->
<syspropertyset dynamic="no">
<propertyref name="compile.c++"/>
@ -743,18 +748,30 @@
<fileset dir="${test.src.dir}" includes="**/${testcase}.java"/>
</batchtest>
</junit>
<fail if="tests.failed">Tests failed!</fail>
<antcall target="checkfailure"/>
</target>
<target name="checkfailure" if="tests.failed">
<touch file="${test.build.dir}/testsfailed"/>
<fail unless="continueOnFailure">Tests failed!</fail>
</target>
<target name="test-contrib" depends="compile, compile-core-test" description="Run contrib unit tests">
<subant target="test">
<property name="version" value="${version}"/>
<property name="clover.jar" value="${clover.jar}"/>
<fileset file="${contrib.dir}/build.xml"/>
</subant>
</target>
<target name="test" depends="test-core, test-contrib" description="Run core, contrib unit tests">
</target>
<target name="test" description="Run core, contrib tests">
<delete file="${test.build.dir}/testsfailed"/>
<property name="continueOnFailure" value="true"/>
<antcall target="test-core"/>
<antcall target="test-contrib"/>
<available file="${test.build.dir}/testsfailed" property="testsfailed"/>
<fail if="testsfailed">Tests failed!</fail>
</target>
<!-- Run all unit tests, not just Test*, and use non-test configuration. -->
<target name="test-cluster" description="Run all unit tests, not just Test*, and use non-test configuration.">
@ -1393,6 +1410,7 @@
<target name="create-c++-utils-makefile" depends="check-c++-makefiles"
if="need.c++.utils.makefile">
<mkdir dir="${build.c++.utils}"/>
<chmod file="${c++.utils.src}/configure" perm="ugo+x"/>
<exec executable="${c++.utils.src}/configure" dir="${build.c++.utils}"
failonerror="yes">
<arg value="--prefix=${install.c++}"/>
@ -1410,6 +1428,7 @@
<target name="create-c++-pipes-makefile" depends="check-c++-makefiles"
if="need.c++.pipes.makefile">
<mkdir dir="${build.c++.pipes}"/>
<chmod file="${c++.pipes.src}/configure" perm="ugo+x"/>
<exec executable="${c++.pipes.src}/configure" dir="${build.c++.pipes}"
failonerror="yes">
<arg value="--prefix=${install.c++}"/>
@ -1432,6 +1451,7 @@
depends="check-c++-makefiles"
if="need.c++.examples.pipes.makefile">
<mkdir dir="${build.c++.examples.pipes}"/>
<chmod file="${c++.examples.pipes.src}/configure" perm="ugo+x"/>
<exec executable="${c++.examples.pipes.src}/configure"
dir="${build.c++.examples.pipes}"
failonerror="yes">

Просмотреть файл

@ -8,21 +8,13 @@
<configuration>
<property>
<name>mapred.capacity-scheduler.queue.default.guaranteed-capacity</name>
<name>mapred.capacity-scheduler.queue.default.capacity</name>
<value>100</value>
<description>Percentage of the number of slots in the cluster that are
guaranteed to be available for jobs in this queue.
to be available for jobs in this queue.
</description>
</property>
<property>
<name>mapred.capacity-scheduler.queue.default.reclaim-time-limit</name>
<value>300</value>
<description>The amount of time, in seconds, before which
resources distributed to other queues will be reclaimed.
</description>
</property>
<property>
<name>mapred.capacity-scheduler.queue.default.supports-priority</name>
<value>false</value>
@ -54,28 +46,9 @@
</description>
</property>
<property>
<name>mapred.capacity-scheduler.reclaimCapacity.interval</name>
<value>5</value>
<description>The time interval, in seconds, between which the scheduler
periodically determines whether capacity needs to be reclaimed for
any queue.
</description>
</property>
<!-- The default configuration settings for the capacity task scheduler -->
<!-- The default values would be applied to all the queues which don't have -->
<!-- the appropriate property for the particular queue -->
<property>
<name>mapred.capacity-scheduler.default-reclaim-time-limit</name>
<value>300</value>
<description>The amount of time, in seconds, before which
resources distributed to other queues will be reclaimed by default
in a job queue.
</description>
</property>
<property>
<name>mapred.capacity-scheduler.default-supports-priority</name>
<value>false</value>
@ -83,37 +56,6 @@
account in scheduling decisions by default in a job queue.
</description>
</property>
<property>
<name>mapred.capacity-scheduler.task.default-pmem-percentage-in-vmem</name>
<value>-1</value>
<description>If mapred.task.maxpmem is set to -1, this configuration will
be used to calculate job's physical memory requirements as a percentage of
the job's virtual memory requirements set via mapred.task.maxvmem. This
property thus provides default value of physical memory for job's that
don't explicitly specify physical memory requirements.
If not explicitly set to a valid value, scheduler will not consider
physical memory for scheduling even if virtual memory based scheduling is
enabled(by setting valid values for both mapred.task.default.maxvmem and
mapred.task.limit.maxvmem).
</description>
</property>
<property>
<name>mapred.capacity-scheduler.task.limit.maxpmem</name>
<value>-1</value>
<description>Configuration that provides an upper limit on the maximum
physical memory that can be specified by a job. The job configuration
mapred.task.maxpmem should be less than this value. If not, the job will
be rejected by the scheduler.
If it is set to -1, scheduler will not consider physical memory for
scheduling even if virtual memory based scheduling is enabled(by setting
valid values for both mapred.task.default.maxvmem and
mapred.task.limit.maxvmem).
</description>
</property>
<property>
<name>mapred.capacity-scheduler.default-minimum-user-limit-percent</name>

Просмотреть файл

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше