Review Board 1.7.22


HIVE-3756 (LOAD DATA does not honor permission inheritance)

Review Request #12050 - Created June 22, 2013 and updated

Chaoyu Tang
trunk
HIVE-, HIVE-3756
Reviewers
hive
ashutoshc, sushanth
hive-git
Problems:
1. When doing load data or insert overwrite to a table, the data files under database/table directory could not inherit their parent's permissions (i.e. group) as described in HIVE-3756.
2. Beside the group issue, the read/write permission mode is also not inherited
3. Same problem affects the partition files (see HIVE-3094)

Cause:
The task results (from load data or insert overwrite) are initially stored in scratchdir and then loaded under warehouse table directory. FileSystem.rename is used in this step (e.g. LoadTable/LoadPartition) to move the dirs/files but it preserves their permissions (including group and mode) which are determined by scratchdir permission or umask. If the scratchdir has different permissions from those of warehouse table directories, the problem occurs.

Solution:
After the FileSystem.rename is called, changing all renamed (moved) files/dirs to their destination parents' permissions if needed (say if hive.warehouse.subdir.inherit.perms is true). Here I introduced a new method renameFile doing both rename and permission. It replaces the FileSystem.rename used in LoadTable/LoadPartition. I do not replace rename used to move files/dirs under same scratchdir in the middle of task processing. It looks to me not necessary since they are temp files and also probably access protected by top scratchdir mode 700 (HIVE-4487).
The following cases tested that all created subdirs/files inherit their parents' permission mode and group in : 1). create database; 2). create table; 3). load data; 4) insert overwrite; 5) partitions.
{code}
hive> dfs -ls -d /user/tester1/hive;                                                                       
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:20 /user/tester1/hive

hive> create database tester1 COMMENT 'Database for user tester1' LOCATION '/user/tester1/hive/tester1.db';
hive> dfs -ls -R /user/tester1/hive;                                                                        
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:21 /user/tester1/hive/tester1.db

hive> use tester1;
hive>  create table tester1.tst1(col1 int, col2 string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE;
hive> dfs -ls -R /user/tester1/hive;                                                                                    
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:22 /user/tester1/hive/tester1.db
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:22 /user/tester1/hive/tester1.db/tst1

hive>  load data local inpath '/home/tester1/tst1.input' into table tst1;                                               
hive> dfs -ls -R /user/tester1/hive;                                     
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:22 /user/tester1/hive/tester1.db
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:23 /user/tester1/hive/tester1.db/tst1
-rw-rw----   3 tester1 testgroup123        168 2013-06-22 13:23 /user/tester1/hive/tester1.db/tst1/tst1.input

hive> create table tester1.tst2(col1 int, col2 string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS SEQUENCEFILE;
hive> dfs -ls -R /user/tester1/hive;                                                                                    
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:24 /user/tester1/hive/tester1.db
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:23 /user/tester1/hive/tester1.db/tst1
-rw-rw----   3 tester1 testgroup123        168 2013-06-22 13:23 /user/tester1/hive/tester1.db/tst1/tst1.input
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:24 /user/tester1/hive/tester1.db/tst2

hive> insert overwrite table tst2 select * from tst1;
hive> dfs -ls -R /user/tester1/hive;                 
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:25 /user/tester1/hive/tester1.db
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:23 /user/tester1/hive/tester1.db/tst1
-rw-rw----   3 tester1 testgroup123        168 2013-06-22 13:23 /user/tester1/hive/tester1.db/tst1/tst1.input
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:25 /user/tester1/hive/tester1.db/tst2
-rw-rw----   3 tester1 testgroup123        291 2013-06-22 13:25 /user/tester1/hive/tester1.db/tst2/000000_0

hive> create table tester1.tst3(col2 string) PARTITIONED BY (col1 int) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE;
hive> dfs -ls -R /user/tester1/hive;                                                                                    
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:27 /user/tester1/hive/tester1.db
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:23 /user/tester1/hive/tester1.db/tst1
-rw-rw----   3 tester1 testgroup123        168 2013-06-22 13:23 /user/tester1/hive/tester1.db/tst1/tst1.input
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:25 /user/tester1/hive/tester1.db/tst2
-rw-rw----   3 tester1 testgroup123        291 2013-06-22 13:25 /user/tester1/hive/tester1.db/tst2/000000_0
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:27 /user/tester1/hive/tester1.db/tst3

hive> set hive.exec.dynamic.partition.mode=nonstrict;                                                                   
hive> insert overwrite table tester1.tst3 partition (col1) select t1.col2, t1.col1 from tst1 t1;
hive> dfs -ls -R /user/tester1/hive;                                                            
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:27 /user/tester1/hive/tester1.db
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:23 /user/tester1/hive/tester1.db/tst1
-rw-rw----   3 tester1 testgroup123        168 2013-06-22 13:23 /user/tester1/hive/tester1.db/tst1/tst1.input
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:25 /user/tester1/hive/tester1.db/tst2
-rw-rw----   3 tester1 testgroup123        291 2013-06-22 13:25 /user/tester1/hive/tester1.db/tst2/000000_0
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:28 /user/tester1/hive/tester1.db/tst3
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:28 /user/tester1/hive/tester1.db/tst3/col1=1111
-rw-rw----   3 tester1 testgroup123         51 2013-06-22 13:28 /user/tester1/hive/tester1.db/tst3/col1=1111/000000_0
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:28 /user/tester1/hive/tester1.db/tst3/col1=2222
-rw-rw----   3 tester1 testgroup123         51 2013-06-22 13:28 /user/tester1/hive/tester1.db/tst3/col1=2222/000000_0
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:28 /user/tester1/hive/tester1.db/tst3/col1=3333
-rw-rw----   3 tester1 testgroup123         51 2013-06-22 13:28 /user/tester1/hive/tester1.db/tst3/col1=3333/000000_0
{code}

Diff revision 3 (Latest)

1 2 3
1 2 3

  1. ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java: Loading...
ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
Revision 87a584d New Change
[20] 2111 lines
[+20] [+] private static boolean destExists(List<List<Path[]>> result, Path proposed) {
2112
      }
2112
      }
2113
    }
2113
    }
2114
    return false;
2114
    return false;
2115
  }
2115
  }
2116

    
   
2116

   

    
   
2117
  //it is assumed that parent directory of the destf should already exist when this

    
   
2118
  //method is called. when the replace value is true, this method works a little different

    
   
2119
  //from mv command if the destf is a directory, it replaces the destf instead of moving under

    
   
2120
  //the destf. in this case, the replaced destf still preserves the original destf's permission

    
   
2121
  static protected boolean renameFile(HiveConf conf, Path srcf, Path destf, FileSystem fs,

    
   
2122
      boolean replace) throws HiveException {

    
   
2123
    boolean success = false;

    
   
2124
    boolean inheritPerms = HiveConf.getBoolVar(conf,

    
   
2125
        HiveConf.ConfVars.HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS);

    
   
2126
    String group = null;

    
   
2127
    String permission = null;

    
   
2128

   

    
   
2129
    try {

    
   
2130
      if (inheritPerms || replace) {

    
   
2131
        try{

    
   
2132
          FileStatus deststatus = fs.getFileStatus(destf);

    
   
2133
          if (inheritPerms) {

    
   
2134
            group = deststatus.getGroup();

    
   
2135
            permission= Integer.toString(deststatus.getPermission().toShort(), 8);

    
   
2136
          }

    
   
2137
          //if destf is an existing directory:

    
   
2138
          //if replace is true, delete followed by rename(mv) is equivalent to replace

    
   
2139
          //if replace is false, rename (mv) actually move the src under dest dir

    
   
2140
          //if destf is an existing file, rename is actually a replace, and do not need

    
   
2141
          // to delete the file first

    
   
2142
          if (replace && deststatus.isDir()) {

    
   
2143
            fs.delete(destf, true);

    
   
2144
          }

    
   
2145
        } catch (FileNotFoundException ignore) {

    
   
2146
          //if dest dir does not exist, any re

    
   
2147
          if (inheritPerms) {

    
   
2148
            FileStatus deststatus = fs.getFileStatus(destf.getParent());

    
   
2149
            group = deststatus.getGroup();

    
   
2150
            permission= Integer.toString(deststatus.getPermission().toShort(), 8);

    
   
2151
          }

    
   
2152
        }

    
   
2153
      }

    
   
2154
      success = fs.rename(srcf, destf);

    
   
2155
      LOG.debug((replace ? "Replacing src:" : "Renaming src:") + srcf.toString()

    
   
2156
          + ";dest: " + destf.toString()  + ";Status:" + success);

    
   
2157
    } catch (IOException ioe) {

    
   
2158
      throw new HiveException("Unable to move source" + srcf + " to destination " + destf, ioe);

    
   
2159
    }

    
   
2160

   

    
   
2161
    if (success && inheritPerms) {

    
   
2162
      //use FsShell to change group and permissions recursively

    
   
2163
      try {

    
   
2164
        FsShell fshell = new FsShell();

    
   
2165
        fshell.setConf(conf);

    
   
2166
        fshell.run(new String[]{"-chgrp", "-R", group, destf.toString()});

    
   
2167
        fshell.run(new String[]{"-chmod", "-R", permission, destf.toString()});

    
   
2168
      } catch (Exception e) {

    
   
2169
        throw new HiveException("Unable to set permissions of " + destf, e);

    
   
2170
      }

    
   
2171
    }

    
   
2172
    return success;

    
   
2173
  }

    
   
2174

   
2117
  static protected void copyFiles(HiveConf conf, Path srcf, Path destf, FileSystem fs)
2175
  static protected void copyFiles(HiveConf conf, Path srcf, Path destf, FileSystem fs)
2118
      throws HiveException {
2176
      throws HiveException {

    
   
2177
    boolean inheritPerms = HiveConf.getBoolVar(conf,

    
   
2178
        HiveConf.ConfVars.HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS);
2119
    try {
2179
    try {
2120
      // create the destination if it does not exist
2180
      // create the destination if it does not exist
2121
      if (!fs.exists(destf)) {
2181
      if (!fs.exists(destf)) {
2122
        fs.mkdirs(destf);
2182
        fs.mkdirs(destf);

    
   
2183
        if (inheritPerms) {

    
   
2184
          fs.setPermission(destf, fs.getFileStatus(destf.getParent()).getPermission());

    
   
2185
        }
2123
      }
2186
      }
2124
    } catch (IOException e) {
2187
    } catch (IOException e) {
2125
      throw new HiveException(
2188
      throw new HiveException(
2126
          "copyFiles: error while checking/creating destination directory!!!",
2189
          "copyFiles: error while checking/creating destination directory!!!",
2127
          e);
2190
          e);
[+20] [20] 16 lines
[+20] static protected void copyFiles(HiveConf conf, Path srcf, Path destf, FileSystem fs)
2144

    
   
2207

   
2145
    // move it, move it
2208
    // move it, move it
2146
    try {
2209
    try {
2147
      for (List<Path[]> sdpairs : result) {
2210
      for (List<Path[]> sdpairs : result) {
2148
        for (Path[] sdpair : sdpairs) {
2211
        for (Path[] sdpair : sdpairs) {
2149
          if (!fs.rename(sdpair[0], sdpair[1])) {
2212
          if (!renameFile(conf, sdpair[0], sdpair[1], fs, false)) {
2150
            throw new IOException("Cannot move " + sdpair[0] + " to " + sdpair[1]);
2213
            throw new IOException("Cannot move " + sdpair[0] + " to " + sdpair[1]);
2151
          }
2214
          }
2152
        }
2215
        }
2153
      }
2216
      }
2154
    } catch (IOException e) {
2217
    } catch (IOException e) {
[+20] [20] 18 lines
[+20] static protected void copyFiles(HiveConf conf, Path srcf, Path destf, FileSystem fs)
2173
   */
2236
   */
2174
  static protected void replaceFiles(Path srcf, Path destf, Path oldPath, HiveConf conf)
2237
  static protected void replaceFiles(Path srcf, Path destf, Path oldPath, HiveConf conf)
2175
      throws HiveException {
2238
      throws HiveException {
2176
    try {
2239
    try {
2177
      FileSystem fs = srcf.getFileSystem(conf);
2240
      FileSystem fs = srcf.getFileSystem(conf);

    
   
2241
      boolean inheritPerms = HiveConf.getBoolVar(conf,

    
   
2242
          HiveConf.ConfVars.HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS);
2178

    
   
2243

   
2179
      // check if srcf contains nested sub-directories
2244
      // check if srcf contains nested sub-directories
2180
      FileStatus[] srcs;
2245
      FileStatus[] srcs;
2181
      try {
2246
      try {
2182
        srcs = fs.globStatus(srcf);
2247
        srcs = fs.globStatus(srcf);
[+20] [20] 4 lines
[+20] static protected void replaceFiles(Path srcf, Path destf, Path oldPath, HiveConf conf)
2187
        LOG.info("No sources specified to move: " + srcf);
2252
        LOG.info("No sources specified to move: " + srcf);
2188
        return;
2253
        return;
2189
      }
2254
      }
2190
      List<List<Path[]>> result = checkPaths(conf, fs, srcs, destf, true);
2255
      List<List<Path[]>> result = checkPaths(conf, fs, srcs, destf, true);
2191

    
   
2256

   
2192
      // point of no return -- delete oldPath
2257
      // point of no return -- delete oldPath only if it is not same as destf,
2193
      if (oldPath != null) {
2258
      // otherwise, the oldPath/destf will be cleaned later just before move

    
   
2259
      if (oldPath != null && (!destf.getFileSystem(conf).equals(oldPath.getFileSystem(conf))

    
   
2260
          || !destf.equals(oldPath))) {
2194
        try {
2261
        try {
2195
          FileSystem fs2 = oldPath.getFileSystem(conf);
2262
          FileSystem fs2 = oldPath.getFileSystem(conf);
2196
          if (fs2.exists(oldPath)) {
2263
          if (fs2.exists(oldPath)) {
2197
            // use FsShell to move data to .Trash first rather than delete permanently
2264
            // use FsShell to move data to .Trash first rather than delete permanently
2198
            FsShell fshell = new FsShell();
2265
            FsShell fshell = new FsShell();
[+20] [20] 7 lines
[+20] static protected void replaceFiles(Path srcf, Path destf, Path oldPath, HiveConf conf)
2206
      }
2273
      }
2207

    
   
2274

   
2208
      // rename src directory to destf
2275
      // rename src directory to destf
2209
      if (srcs.length == 1 && srcs[0].isDir()) {
2276
      if (srcs.length == 1 && srcs[0].isDir()) {
2210
        // rename can fail if the parent doesn't exist
2277
        // rename can fail if the parent doesn't exist
2211
        if (!fs.exists(destf.getParent())) {
2278
        Path destfp = destf.getParent();
2212
          fs.mkdirs(destf.getParent());
2279
        if (!fs.exists(destfp)) {

    
   
2280
          boolean success = fs.mkdirs(destfp);

    
   
2281
          if (inheritPerms && success) {

    
   
2282
            fs.setPermission(destfp, fs.getFileStatus(destfp.getParent()).getPermission());
2213
        }
2283
          }
2214
        if (fs.exists(destf)) {

   
2215
          fs.delete(destf, true);

   
2216
        }
2284
        }
2217

    
   
2285

   
2218
        boolean b = fs.rename(srcs[0].getPath(), destf);
2286
        boolean b = renameFile(conf, srcs[0].getPath(), destf, fs, true);
2219
        if (!b) {
2287
        if (!b) {
2220
          throw new HiveException("Unable to move results from " + srcs[0].getPath()
2288
          throw new HiveException("Unable to move results from " + srcs[0].getPath()
2221
              + " to destination directory: " + destf);
2289
              + " to destination directory: " + destf);
2222
        }
2290
        }
2223
        LOG.debug("Renaming:" + srcf.toString() + " to " + destf.toString()  + ",Status:" + b);

   
2224
      } else { // srcf is a file or pattern containing wildcards
2291
      } else { // srcf is a file or pattern containing wildcards
2225
        if (!fs.exists(destf)) {
2292
        if (!fs.exists(destf)) {
2226
          fs.mkdirs(destf);
2293
          boolean success = fs.mkdirs(destf);

    
   
2294
          if (inheritPerms && success) {

    
   
2295
            fs.setPermission(destf, fs.getFileStatus(destf.getParent()).getPermission());

    
   
2296
          }
2227
        }
2297
        }
2228
        // srcs must be a list of files -- ensured by LoadSemanticAnalyzer
2298
        // srcs must be a list of files -- ensured by LoadSemanticAnalyzer
2229
        for (List<Path[]> sdpairs : result) {
2299
        for (List<Path[]> sdpairs : result) {
2230
          for (Path[] sdpair : sdpairs) {
2300
          for (Path[] sdpair : sdpairs) {
2231
            if (!fs.rename(sdpair[0], sdpair[1])) {
2301
            if (!renameFile(conf, sdpair[0], sdpair[1], fs, true)) {
2232
              throw new IOException("Error moving: " + sdpair[0] + " into: " + sdpair[1]);
2302
              throw new IOException("Error moving: " + sdpair[0] + " into: " + sdpair[1]);
2233
            }
2303
            }
2234
          }
2304
          }
2235
        }
2305
        }
2236
      }
2306
      }
[+20] [20] 206 lines
  1. ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java: Loading...