public class HadoopUtils extends Object
| Modifier and Type | Field and Description |
|---|---|
static Collection<String> |
FS_SCHEMES_NON_ATOMIC
A
Collection of all known FileSystem schemes that do not support atomic renames or copies. |
static String |
HDFS_ILLEGAL_TOKEN_REGEX |
static String |
MAX_FILESYSTEM_QPS |
| Constructor and Description |
|---|
HadoopUtils() |
| Modifier and Type | Method and Description |
|---|---|
static void |
addGobblinSite()
Add "gobblin-site.xml" as a
Configuration resource. |
static void |
copyFile(org.apache.hadoop.fs.FileSystem srcFs,
org.apache.hadoop.fs.Path src,
org.apache.hadoop.fs.FileSystem dstFs,
org.apache.hadoop.fs.Path dst,
boolean overwrite,
org.apache.hadoop.conf.Configuration conf)
Copy a file from a srcFs
FileSystem to a dstFs FileSystem. |
static void |
copyFile(org.apache.hadoop.fs.FileSystem srcFs,
org.apache.hadoop.fs.Path src,
org.apache.hadoop.fs.FileSystem dstFs,
org.apache.hadoop.fs.Path dst,
org.apache.hadoop.fs.Path tmp,
boolean overwriteDst,
org.apache.hadoop.conf.Configuration conf)
Copies a src
Path from a srcFs FileSystem to a dst Path on a dstFs FileSystem. |
static void |
copyPath(org.apache.hadoop.fs.FileSystem srcFs,
org.apache.hadoop.fs.Path src,
org.apache.hadoop.fs.FileSystem dstFs,
org.apache.hadoop.fs.Path dst,
boolean overwrite,
org.apache.hadoop.conf.Configuration conf)
Copies data from a src
Path to a dst Path. |
static void |
copyPath(org.apache.hadoop.fs.FileSystem srcFs,
org.apache.hadoop.fs.Path src,
org.apache.hadoop.fs.FileSystem dstFs,
org.apache.hadoop.fs.Path dst,
org.apache.hadoop.conf.Configuration conf)
Copies data from a src
Path to a dst Path. |
static void |
deleteDirectories(org.apache.hadoop.fs.FileSystem fs,
List<String> directoriesToDelete,
boolean recursive,
boolean moveToTrash)
Calls deletePath() on each directory in the given list of directories to delete.
|
static void |
deleteIfExists(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path path,
boolean recursive)
A wrapper around
FileSystem.delete(Path, boolean) that only deletes a given Path if it is present
on the given FileSystem. |
static void |
deletePath(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path f,
boolean recursive)
A wrapper around
FileSystem.delete(Path, boolean) which throws IOException if the given
Path exists, and FileSystem.delete(Path, boolean) returns False. |
static void |
deletePathAndEmptyAncestors(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path f,
boolean recursive) |
static void |
deletePathByRegex(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path path,
String regex)
Delete files according to the regular expression provided
|
static org.apache.hadoop.io.Writable |
deserializeFromString(Class<? extends org.apache.hadoop.io.Writable> writableClass,
String serializedWritableStr)
Deserialize a
Writable object from a string. |
static org.apache.hadoop.io.Writable |
deserializeFromString(Class<? extends org.apache.hadoop.io.Writable> writableClass,
String serializedWritableStr,
org.apache.hadoop.conf.Configuration configuration)
Deserialize a
Writable object from a string. |
static org.apache.hadoop.fs.permission.FsPermission |
deserializeFsPermission(State props,
String propName,
org.apache.hadoop.fs.permission.FsPermission defaultPermission)
Get
FsPermission from a State object. |
static org.apache.hadoop.fs.permission.FsPermission |
deserializeWriterDirPermissions(State state,
int numBranches,
int branchId)
Deserializes a
FsPermissions object that should be used when a DataWriter is creating directories. |
static org.apache.hadoop.fs.permission.FsPermission |
deserializeWriterFilePermissions(State state,
int numBranches,
int branchId)
Deserializes a
FsPermissions object that should be used when a DataWriter is writing a file. |
static org.apache.hadoop.conf.Configuration |
getConfFromProperties(Properties properties) |
static org.apache.hadoop.conf.Configuration |
getConfFromState(State state) |
static org.apache.hadoop.conf.Configuration |
getConfFromState(State state,
com.google.common.base.Optional<String> encryptedPath)
Provides Hadoop configuration given state.
|
static org.apache.hadoop.fs.FileSystem |
getOptionallyThrottledFileSystem(org.apache.hadoop.fs.FileSystem fs,
int qpsLimit)
Get a throttled
FileSystem that limits the number of queries per second to a FileSystem. |
static org.apache.hadoop.fs.FileSystem |
getOptionallyThrottledFileSystem(org.apache.hadoop.fs.FileSystem fs,
State state)
Calls
getOptionallyThrottledFileSystem(FileSystem, int) parsing the qps from the input State
at key MAX_FILESYSTEM_QPS. |
static org.apache.hadoop.fs.FileSystem |
getSourceFileSystem(State state)
Get a
FileSystem object for the uri specified at ConfigurationKeys.SOURCE_FILEBASED_FS_URI. |
static State |
getStateFromConf(org.apache.hadoop.conf.Configuration conf) |
static org.apache.hadoop.fs.FileSystem |
getWriterFileSystem(State state,
int numBranches,
int branchId)
Get a
FileSystem object for the uri specified at ConfigurationKeys.WRITER_FILE_SYSTEM_URI. |
static boolean |
hasContent(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path path) |
static List<org.apache.hadoop.fs.FileStatus> |
listStatusRecursive(org.apache.hadoop.fs.FileSystem fileSystem,
org.apache.hadoop.fs.Path path)
Deprecated.
|
static void |
movePath(org.apache.hadoop.fs.FileSystem srcFs,
org.apache.hadoop.fs.Path src,
org.apache.hadoop.fs.FileSystem dstFs,
org.apache.hadoop.fs.Path dst,
boolean overwrite,
org.apache.hadoop.conf.Configuration conf)
Moves a src
Path from a srcFs FileSystem to a dst Path on a dstFs FileSystem. |
static void |
movePath(org.apache.hadoop.fs.FileSystem srcFs,
org.apache.hadoop.fs.Path src,
org.apache.hadoop.fs.FileSystem dstFs,
org.apache.hadoop.fs.Path dst,
org.apache.hadoop.conf.Configuration conf)
Moves a src
Path from a srcFs FileSystem to a dst Path on a dstFs FileSystem. |
static void |
moveToTrash(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path path)
Moves the object to the filesystem trash according to the file system policy.
|
static org.apache.hadoop.conf.Configuration |
newConfiguration() |
static void |
renamePath(org.apache.hadoop.fs.FileContext fc,
org.apache.hadoop.fs.Path oldName,
org.apache.hadoop.fs.Path newName)
A wrapper around
FileContext.rename(Path, Path, Options.Rename...). |
static void |
renamePath(org.apache.hadoop.fs.FileContext fc,
org.apache.hadoop.fs.Path oldName,
org.apache.hadoop.fs.Path newName,
boolean overwrite)
A wrapper around
FileContext.rename(Path, Path, Options.Rename...)}. |
static void |
renamePath(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path oldName,
org.apache.hadoop.fs.Path newName)
A wrapper around
FileSystem.rename(Path, Path) which throws IOException if
FileSystem.rename(Path, Path) returns False. |
static void |
renamePath(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path oldName,
org.apache.hadoop.fs.Path newName,
boolean overwrite)
A wrapper around
FileSystem.rename(Path, Path) which throws IOException if
FileSystem.rename(Path, Path) returns False. |
static boolean |
renamePathHandleLocalFSRace(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path src,
org.apache.hadoop.fs.Path dst)
Renames a src
Path on fs FileSystem to a dst Path. |
static void |
renameRecursively(org.apache.hadoop.fs.FileSystem fileSystem,
org.apache.hadoop.fs.Path from,
org.apache.hadoop.fs.Path to)
This method is an additive implementation of the
FileSystem.rename(Path, Path) method. |
static boolean |
safeRenameIfNotExists(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path from,
org.apache.hadoop.fs.Path to)
Renames from to to if to doesn't exist in a thread-safe way.
|
static void |
safeRenameRecursively(org.apache.hadoop.fs.FileSystem fileSystem,
org.apache.hadoop.fs.Path from,
org.apache.hadoop.fs.Path to)
A thread safe variation of
renamePath(FileSystem, Path, Path) which can be used in
multi-threaded/multi-mapper environment. |
static org.apache.hadoop.fs.Path |
sanitizePath(org.apache.hadoop.fs.Path path,
String substitute)
Remove illegal HDFS path characters from the given path.
|
static String |
sanitizePath(String path,
String substitute)
Remove illegal HDFS path characters from the given path.
|
static String |
serializeToString(org.apache.hadoop.io.Writable writable)
Serialize a
Writable object into a string. |
static void |
serializeWriterDirPermissions(State state,
int numBranches,
int branchId,
org.apache.hadoop.fs.permission.FsPermission fsPermissions)
Given a
FsPermission objects, set a key, value pair in the given State for the writer to
use when creating files. |
static void |
serializeWriterFilePermissions(State state,
int numBranches,
int branchId,
org.apache.hadoop.fs.permission.FsPermission fsPermissions)
Given a
FsPermission objects, set a key, value pair in the given State for the writer to
use when creating files. |
static void |
setGroup(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path path,
String group)
Set the group associated with a given path.
|
static void |
setPermissions(org.apache.hadoop.fs.Path location,
com.google.common.base.Optional<String> owner,
com.google.common.base.Optional<String> group,
org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.permission.FsPermission permission)
Try to set owner and permissions for the path.
|
static void |
setWriterDirOctalPermissions(State state,
int numBranches,
int branchId,
String octalPermissions)
|
static void |
setWriterFileOctalPermissions(State state,
int numBranches,
int branchId,
String octalPermissions)
|
static String |
toUriPath(org.apache.hadoop.fs.Path path)
Get the path as a string without schema or authority.
|
static boolean |
unsafeRenameIfNotExists(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path from,
org.apache.hadoop.fs.Path to)
Renames from to to if to doesn't exist in a non-thread-safe way.
|
public static final String HDFS_ILLEGAL_TOKEN_REGEX
public static final Collection<String> FS_SCHEMES_NON_ATOMIC
Collection of all known FileSystem schemes that do not support atomic renames or copies.
The following important properties are useful to remember when writing code that is compatible with S3:
FileSystem.create(Path) will first go to the local filesystem, when the stream
is closed the local file will be uploaded to S3public static final String MAX_FILESYSTEM_QPS
public static org.apache.hadoop.conf.Configuration newConfiguration()
@Deprecated public static List<org.apache.hadoop.fs.FileStatus> listStatusRecursive(org.apache.hadoop.fs.FileSystem fileSystem, org.apache.hadoop.fs.Path path) throws IOException
FileListUtils.listFilesRecursively(FileSystem, Path).IOExceptionpublic static String toUriPath(org.apache.hadoop.fs.Path path)
public static void deletePath(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path f,
boolean recursive)
throws IOException
FileSystem.delete(Path, boolean) which throws IOException if the given
Path exists, and FileSystem.delete(Path, boolean) returns False.IOExceptionpublic static void deleteDirectories(org.apache.hadoop.fs.FileSystem fs,
List<String> directoriesToDelete,
boolean recursive,
boolean moveToTrash)
throws IOException
IOExceptionpublic static void deleteIfExists(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path path,
boolean recursive)
throws IOException
FileSystem.delete(Path, boolean) that only deletes a given Path if it is present
on the given FileSystem.IOExceptionpublic static void deletePathAndEmptyAncestors(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path f,
boolean recursive)
throws IOException
IOExceptionpublic static void deletePathByRegex(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path path,
String regex)
throws IOException
fs - Filesystem objectpath - base pathregex - regular expression to select files to deleteIOExceptionpublic static void moveToTrash(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path path)
throws IOException
fs - FileSystem objectpath - Path to the object to be moved to trash.IOExceptionpublic static boolean renamePathHandleLocalFSRace(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path src,
org.apache.hadoop.fs.Path dst)
throws IOException
Path on fs FileSystem to a dst Path. If fs is a LocalFileSystem and
src is a directory then File.renameTo(java.io.File) is called directly to avoid a directory rename race condition where
RawLocalFileSystem.rename(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path) copies the conflicting src directory into dst resulting in
an extra nested level, such as /root/a/b/c/e/e where e is repeated.fs - the FileSystem where the src Path existssrc - the source Path which will be renameddst - the Path to rename toIOException - if rename failed for reasons other than target exists.public static void renamePath(org.apache.hadoop.fs.FileContext fc,
org.apache.hadoop.fs.Path oldName,
org.apache.hadoop.fs.Path newName)
throws IOException
FileContext.rename(Path, Path, Options.Rename...).IOExceptionpublic static void renamePath(org.apache.hadoop.fs.FileContext fc,
org.apache.hadoop.fs.Path oldName,
org.apache.hadoop.fs.Path newName,
boolean overwrite)
throws IOException
FileContext.rename(Path, Path, Options.Rename...)}.IOExceptionpublic static void renamePath(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path oldName,
org.apache.hadoop.fs.Path newName)
throws IOException
FileSystem.rename(Path, Path) which throws IOException if
FileSystem.rename(Path, Path) returns False.IOExceptionpublic static void renamePath(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path oldName,
org.apache.hadoop.fs.Path newName,
boolean overwrite)
throws IOException
FileSystem.rename(Path, Path) which throws IOException if
FileSystem.rename(Path, Path) returns False.IOExceptionpublic static void movePath(org.apache.hadoop.fs.FileSystem srcFs,
org.apache.hadoop.fs.Path src,
org.apache.hadoop.fs.FileSystem dstFs,
org.apache.hadoop.fs.Path dst,
org.apache.hadoop.conf.Configuration conf)
throws IOException
Path from a srcFs FileSystem to a dst Path on a dstFs FileSystem. If
the srcFs and the dstFs have the same scheme, and neither of them or S3 schemes, then the Path is simply
renamed. Otherwise, the data is from the src Path to the dst Path. So this method can handle copying
data between different FileSystem implementations.srcFs - the source FileSystem where the src Path existssrc - the source Path which will me moveddstFs - the destination FileSystem where the dst Path should be createddst - the Path to move data toIOExceptionpublic static void movePath(org.apache.hadoop.fs.FileSystem srcFs,
org.apache.hadoop.fs.Path src,
org.apache.hadoop.fs.FileSystem dstFs,
org.apache.hadoop.fs.Path dst,
boolean overwrite,
org.apache.hadoop.conf.Configuration conf)
throws IOException
Path from a srcFs FileSystem to a dst Path on a dstFs FileSystem. If
the srcFs and the dstFs have the same scheme, and neither of them or S3 schemes, then the Path is simply
renamed. Otherwise, the data is from the src Path to the dst Path. So this method can handle copying
data between different FileSystem implementations.srcFs - the source FileSystem where the src Path existssrc - the source Path which will me moveddstFs - the destination FileSystem where the dst Path should be createddst - the Path to move data tooverwrite - true if the destination should be overwritten; otherwise, falseIOExceptionpublic static void copyPath(org.apache.hadoop.fs.FileSystem srcFs,
org.apache.hadoop.fs.Path src,
org.apache.hadoop.fs.FileSystem dstFs,
org.apache.hadoop.fs.Path dst,
org.apache.hadoop.conf.Configuration conf)
throws IOException
Path to a dst Path.
This method should be used in preference to
FileUtil.copy(FileSystem, Path, FileSystem, Path, boolean, boolean, Configuration), which does not handle
clean up of incomplete files if there is an error while copying data.
TODO this method does not handle cleaning up any local files leftover by writing to S3.
srcFs - the source FileSystem where the src Path existssrc - the Path to copy from the source FileSystemdstFs - the destination FileSystem where the dst Path should be createddst - the Path to copy data toIOExceptionpublic static void copyPath(org.apache.hadoop.fs.FileSystem srcFs,
org.apache.hadoop.fs.Path src,
org.apache.hadoop.fs.FileSystem dstFs,
org.apache.hadoop.fs.Path dst,
boolean overwrite,
org.apache.hadoop.conf.Configuration conf)
throws IOException
Path to a dst Path.
This method should be used in preference to
FileUtil.copy(FileSystem, Path, FileSystem, Path, boolean, boolean, Configuration), which does not handle
clean up of incomplete files if there is an error while copying data.
TODO this method does not handle cleaning up any local files leftover by writing to S3.
srcFs - the source FileSystem where the src Path existssrc - the Path to copy from the source FileSystemdstFs - the destination FileSystem where the dst Path should be createddst - the Path to copy data tooverwrite - true if the destination should be overwritten; otherwise, falseIOExceptionpublic static void copyFile(org.apache.hadoop.fs.FileSystem srcFs,
org.apache.hadoop.fs.Path src,
org.apache.hadoop.fs.FileSystem dstFs,
org.apache.hadoop.fs.Path dst,
org.apache.hadoop.fs.Path tmp,
boolean overwriteDst,
org.apache.hadoop.conf.Configuration conf)
throws IOException
Path from a srcFs FileSystem to a dst Path on a dstFs FileSystem. If
either the srcFs or dstFs are S3 FileSystems (as dictated by FS_SCHEMES_NON_ATOMIC) then data is directly
copied from the src to the dst. Otherwise data is first copied to a tmp Path, which is then renamed to the
dst.srcFs - the source FileSystem where the src Path existssrc - the Path to copy from the source FileSystemdstFs - the destination FileSystem where the dst Path should be createddst - the Path to copy data totmp - the temporary Path to use when copying dataoverwriteDst - true if the destination and tmp path should should be overwritten, false otherwiseIOExceptionpublic static void copyFile(org.apache.hadoop.fs.FileSystem srcFs,
org.apache.hadoop.fs.Path src,
org.apache.hadoop.fs.FileSystem dstFs,
org.apache.hadoop.fs.Path dst,
boolean overwrite,
org.apache.hadoop.conf.Configuration conf)
throws IOException
FileSystem to a dstFs FileSystem. The src Path must be a file,
that is FileSystem.isFile(Path) must return true for src.
If overwrite is specified to true, this method may delete the dst directory even if the copy from src to dst fails.
srcFs - the src FileSystem to copy the file fromsrc - the src Path to copydstFs - the destination FileSystem to write todst - the destination Path to write tooverwrite - true if the dst Path should be overwritten, false otherwiseIOExceptionpublic static void renameRecursively(org.apache.hadoop.fs.FileSystem fileSystem,
org.apache.hadoop.fs.Path from,
org.apache.hadoop.fs.Path to)
throws IOException
FileSystem.rename(Path, Path) method. It moves all the
files/directories under 'from' path to the 'to' path without overwriting existing directories in the 'to' path.
The rename operation happens at the first non-existent sub-directory. If a directory at destination path already exists, it recursively tries to move sub-directories. If all the sub-directories also exist at the destination, a file level move is done
fileSystem - on which the data needs to be movedfrom - path of the data to be movedto - path of the data to be movedIOExceptionpublic static org.apache.hadoop.fs.FileSystem getOptionallyThrottledFileSystem(org.apache.hadoop.fs.FileSystem fs,
State state)
throws IOException
getOptionallyThrottledFileSystem(FileSystem, int) parsing the qps from the input State
at key MAX_FILESYSTEM_QPS.IOExceptionpublic static org.apache.hadoop.fs.FileSystem getOptionallyThrottledFileSystem(org.apache.hadoop.fs.FileSystem fs,
int qpsLimit)
throws IOException
FileSystem that limits the number of queries per second to a FileSystem. If
the input qps is <= 0, no such throttling will be performed.IOExceptionpublic static boolean safeRenameIfNotExists(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path from,
org.apache.hadoop.fs.Path to)
throws IOException
FileSystem.rename(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path) is inconsistent across file system implementations, e.g. in some of them rename(foo, bar)
will create bar/foo if bar already existed, but it will only create bar if it didn't.
The thread-safety is only guaranteed among calls to this method. An external modification to the relevant target directory could still cause unexpected results in the renaming.
fs - filesystem where rename will be executed.from - origin Path.to - target Path.IOException - if rename failed for reasons other than target exists.public static boolean unsafeRenameIfNotExists(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path from,
org.apache.hadoop.fs.Path to)
throws IOException
fs - filesystem where rename will be executed.from - origin Path.to - target Path.IOException - if rename failed for reasons other than target exists.public static void safeRenameRecursively(org.apache.hadoop.fs.FileSystem fileSystem,
org.apache.hadoop.fs.Path from,
org.apache.hadoop.fs.Path to)
throws IOException
renamePath(FileSystem, Path, Path) which can be used in
multi-threaded/multi-mapper environment. The rename operation always happens at file level hence directories are
not overwritten under the 'to' path.
If the contents of destination 'to' path is not expected to be modified concurrently, use
renamePath(FileSystem, Path, Path) which is faster and more optimized
FileSystem implementations. Use
renameRecursively(FileSystem, Path, Path)fileSystem - on which the data needs to be movedfrom - path of the data to be movedto - path of the data to be movedIOExceptionpublic static org.apache.hadoop.conf.Configuration getConfFromState(State state)
public static org.apache.hadoop.conf.Configuration getConfFromState(State state, com.google.common.base.Optional<String> encryptedPath)
srcConfig - source config.encryptedPath - Optional. If provided, config that is on this path will be decrypted. @see ConfigUtils.resolveEncrypted
Note that config on encryptedPath will be included in the end result even it's not part of includeOnlyPathpublic static org.apache.hadoop.conf.Configuration getConfFromProperties(Properties properties)
public static State getStateFromConf(org.apache.hadoop.conf.Configuration conf)
public static void setGroup(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path path,
String group)
throws IOException
fs - the FileSystem instance used to perform the file operationpath - the given pathgroup - the group associated with the pathIOExceptionpublic static String serializeToString(org.apache.hadoop.io.Writable writable) throws IOException
Writable object into a string.writable - the Writable object to be serializedWritable objectIOException - if there's something wrong with the serializationpublic static org.apache.hadoop.io.Writable deserializeFromString(Class<? extends org.apache.hadoop.io.Writable> writableClass, String serializedWritableStr) throws IOException
Writable object from a string.writableClass - the Writable implementation classserializedWritableStr - the string containing a serialized Writable objectWritable deserialized from the stringIOException - if there's something wrong with the deserializationpublic static org.apache.hadoop.io.Writable deserializeFromString(Class<? extends org.apache.hadoop.io.Writable> writableClass, String serializedWritableStr, org.apache.hadoop.conf.Configuration configuration) throws IOException
Writable object from a string.writableClass - the Writable implementation classserializedWritableStr - the string containing a serialized Writable objectconfiguration - a Configuration object containing Hadoop configuration propertiesWritable deserialized from the stringIOException - if there's something wrong with the deserializationpublic static void serializeWriterFilePermissions(State state, int numBranches, int branchId, org.apache.hadoop.fs.permission.FsPermission fsPermissions)
FsPermission objects, set a key, value pair in the given State for the writer to
use when creating files. This method should be used in conjunction with deserializeWriterFilePermissions(State, int, int).public static void serializeWriterDirPermissions(State state, int numBranches, int branchId, org.apache.hadoop.fs.permission.FsPermission fsPermissions)
FsPermission objects, set a key, value pair in the given State for the writer to
use when creating files. This method should be used in conjunction with deserializeWriterDirPermissions(State, int, int).public static void setWriterFileOctalPermissions(State state, int numBranches, int branchId, String octalPermissions)
String in octal notation, set a key, value pair in the given State for the writer to
use when creating files. This method should be used in conjunction with deserializeWriterFilePermissions(State, int, int).public static void setWriterDirOctalPermissions(State state, int numBranches, int branchId, String octalPermissions)
String in octal notation, set a key, value pair in the given State for the writer to
use when creating directories. This method should be used in conjunction with deserializeWriterDirPermissions(State, int, int).public static org.apache.hadoop.fs.permission.FsPermission deserializeWriterFilePermissions(State state, int numBranches, int branchId)
FsPermissions object that should be used when a DataWriter is writing a file.public static org.apache.hadoop.fs.permission.FsPermission deserializeWriterDirPermissions(State state, int numBranches, int branchId)
FsPermissions object that should be used when a DataWriter is creating directories.public static org.apache.hadoop.fs.permission.FsPermission deserializeFsPermission(State props, String propName, org.apache.hadoop.fs.permission.FsPermission defaultPermission)
FsPermission from a State object.props - A State containing properties.propName - The property name for the permission. If not contained in the given state,
defaultPermission will be used.defaultPermission - default permission if propName is not contained in props.FsPermission object.public static String sanitizePath(String path, String substitute)
public static org.apache.hadoop.fs.Path sanitizePath(org.apache.hadoop.fs.Path path,
String substitute)
public static void setPermissions(org.apache.hadoop.fs.Path location,
com.google.common.base.Optional<String> owner,
com.google.common.base.Optional<String> group,
org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.permission.FsPermission permission)
public static boolean hasContent(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path path)
throws IOException
IOExceptionpublic static void addGobblinSite()
Configuration resource.public static org.apache.hadoop.fs.FileSystem getSourceFileSystem(State state) throws IOException
FileSystem object for the uri specified at ConfigurationKeys.SOURCE_FILEBASED_FS_URI.IOExceptionpublic static org.apache.hadoop.fs.FileSystem getWriterFileSystem(State state, int numBranches, int branchId) throws IOException
FileSystem object for the uri specified at ConfigurationKeys.WRITER_FILE_SYSTEM_URI.IOException