Review Board 1.7.22


HIVE-3756 (LOAD DATA does not honor permission inheritance)

Review Request #12050 - Created June 22, 2013 and updated

Chaoyu Tang
trunk
HIVE-, HIVE-3756
Reviewers
hive
ashutoshc, sushanth
hive-git
Problems:
1. When doing load data or insert overwrite to a table, the data files under database/table directory could not inherit their parent's permissions (i.e. group) as described in HIVE-3756.
2. Beside the group issue, the read/write permission mode is also not inherited
3. Same problem affects the partition files (see HIVE-3094)

Cause:
The task results (from load data or insert overwrite) are initially stored in scratchdir and then loaded under warehouse table directory. FileSystem.rename is used in this step (e.g. LoadTable/LoadPartition) to move the dirs/files but it preserves their permissions (including group and mode) which are determined by scratchdir permission or umask. If the scratchdir has different permissions from those of warehouse table directories, the problem occurs.

Solution:
After the FileSystem.rename is called, changing all renamed (moved) files/dirs to their destination parents' permissions if needed (say if hive.warehouse.subdir.inherit.perms is true). Here I introduced a new method renameFile doing both rename and permission. It replaces the FileSystem.rename used in LoadTable/LoadPartition. I do not replace rename used to move files/dirs under same scratchdir in the middle of task processing. It looks to me not necessary since they are temp files and also probably access protected by top scratchdir mode 700 (HIVE-4487).
The following cases tested that all created subdirs/files inherit their parents' permission mode and group in : 1). create database; 2). create table; 3). load data; 4) insert overwrite; 5) partitions.
{code}
hive> dfs -ls -d /user/tester1/hive;                                                                       
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:20 /user/tester1/hive

hive> create database tester1 COMMENT 'Database for user tester1' LOCATION '/user/tester1/hive/tester1.db';
hive> dfs -ls -R /user/tester1/hive;                                                                        
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:21 /user/tester1/hive/tester1.db

hive> use tester1;
hive>  create table tester1.tst1(col1 int, col2 string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE;
hive> dfs -ls -R /user/tester1/hive;                                                                                    
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:22 /user/tester1/hive/tester1.db
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:22 /user/tester1/hive/tester1.db/tst1

hive>  load data local inpath '/home/tester1/tst1.input' into table tst1;                                               
hive> dfs -ls -R /user/tester1/hive;                                     
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:22 /user/tester1/hive/tester1.db
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:23 /user/tester1/hive/tester1.db/tst1
-rw-rw----   3 tester1 testgroup123        168 2013-06-22 13:23 /user/tester1/hive/tester1.db/tst1/tst1.input

hive> create table tester1.tst2(col1 int, col2 string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS SEQUENCEFILE;
hive> dfs -ls -R /user/tester1/hive;                                                                                    
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:24 /user/tester1/hive/tester1.db
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:23 /user/tester1/hive/tester1.db/tst1
-rw-rw----   3 tester1 testgroup123        168 2013-06-22 13:23 /user/tester1/hive/tester1.db/tst1/tst1.input
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:24 /user/tester1/hive/tester1.db/tst2

hive> insert overwrite table tst2 select * from tst1;
hive> dfs -ls -R /user/tester1/hive;                 
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:25 /user/tester1/hive/tester1.db
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:23 /user/tester1/hive/tester1.db/tst1
-rw-rw----   3 tester1 testgroup123        168 2013-06-22 13:23 /user/tester1/hive/tester1.db/tst1/tst1.input
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:25 /user/tester1/hive/tester1.db/tst2
-rw-rw----   3 tester1 testgroup123        291 2013-06-22 13:25 /user/tester1/hive/tester1.db/tst2/000000_0

hive> create table tester1.tst3(col2 string) PARTITIONED BY (col1 int) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE;
hive> dfs -ls -R /user/tester1/hive;                                                                                    
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:27 /user/tester1/hive/tester1.db
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:23 /user/tester1/hive/tester1.db/tst1
-rw-rw----   3 tester1 testgroup123        168 2013-06-22 13:23 /user/tester1/hive/tester1.db/tst1/tst1.input
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:25 /user/tester1/hive/tester1.db/tst2
-rw-rw----   3 tester1 testgroup123        291 2013-06-22 13:25 /user/tester1/hive/tester1.db/tst2/000000_0
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:27 /user/tester1/hive/tester1.db/tst3

hive> set hive.exec.dynamic.partition.mode=nonstrict;                                                                   
hive> insert overwrite table tester1.tst3 partition (col1) select t1.col2, t1.col1 from tst1 t1;
hive> dfs -ls -R /user/tester1/hive;                                                            
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:27 /user/tester1/hive/tester1.db
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:23 /user/tester1/hive/tester1.db/tst1
-rw-rw----   3 tester1 testgroup123        168 2013-06-22 13:23 /user/tester1/hive/tester1.db/tst1/tst1.input
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:25 /user/tester1/hive/tester1.db/tst2
-rw-rw----   3 tester1 testgroup123        291 2013-06-22 13:25 /user/tester1/hive/tester1.db/tst2/000000_0
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:28 /user/tester1/hive/tester1.db/tst3
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:28 /user/tester1/hive/tester1.db/tst3/col1=1111
-rw-rw----   3 tester1 testgroup123         51 2013-06-22 13:28 /user/tester1/hive/tester1.db/tst3/col1=1111/000000_0
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:28 /user/tester1/hive/tester1.db/tst3/col1=2222
-rw-rw----   3 tester1 testgroup123         51 2013-06-22 13:28 /user/tester1/hive/tester1.db/tst3/col1=2222/000000_0
drwxrwx---   - tester1 testgroup123          0 2013-06-22 13:28 /user/tester1/hive/tester1.db/tst3/col1=3333
-rw-rw----   3 tester1 testgroup123         51 2013-06-22 13:28 /user/tester1/hive/tester1.db/tst3/col1=3333/000000_0
{code}

Diff revision 2

This is not the most recent revision of the diff. The latest diff is revision 3. See what's changed.

1 2 3
1 2 3

  1. ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java: Loading...
ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
Revision 17daaa1 New Change
[20] 2110 lines
[+20] [+] private static boolean destExists(List<List<Path[]>> result, Path proposed) {
2111
      }
2111
      }
2112
    }
2112
    }
2113
    return false;
2113
    return false;
2114
  }
2114
  }
2115

    
   
2115

   

    
   
2116
  //it is assumed that parent directory of the destf should already exist when this

    
   
2117
  //method is called, just like command mv

    
   
2118
  static protected boolean renameFile(HiveConf conf, Path srcf, Path destf, FileSystem fs)

    
   
2119
      throws HiveException {

    
   
2120
    boolean success = false;

    
   
2121
    boolean inheritPerms = HiveConf.getBoolVar(conf,

    
   
2122
        HiveConf.ConfVars.HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS);

    
   
2123
    Path destfp = destf.getParent();

    
   
2124
    if (inheritPerms) {

    
   
2125
      try {

    
   
2126
        //if destf is an existing directory, rename just move the src file/dir under destf and

    
   
2127
        //destf itself will be the parent dir after the rename

    
   
2128
        if (fs.exists(destf) && fs.getFileStatus(destf).isDir()) {

    
   
2129
          destfp = destf;

    
   
2130
          LOG.debug("Renaming:" + destf.toString()  + " is an existing directory; " +

    
   
2131
          srcf.toString() + " will move under it.");

    
   
2132
        }

    
   
2133
      } catch (IOException ioe) {

    
   
2134
        throw new HiveException("Unable to check the status of dest " + destf, ioe);

    
   
2135
      }

    
   
2136
    }

    
   
2137

   

    
   
2138
    try {

    
   
2139
      success = fs.rename(srcf, destf);

    
   
2140
      LOG.debug("Renaming:" + srcf.toString() + " to " + destf.toString()  + ",Status:" + success);

    
   
2141
    } catch (IOException ioe) {

    
   
2142
      throw new HiveException("Unable to move source" + srcf + " to destination " + destf, ioe);

    
   
2143
    }

    
   
2144

   

    
   
2145
    if (success && inheritPerms) {

    
   
2146
      //use FsShell to change group and permissions recursively

    
   
2147
      FsShell fshell = new FsShell();

    
   
2148
      fshell.setConf(conf);

    
   
2149
      try {

    
   
2150
        FileStatus fstatus = fs.getFileStatus(destfp);

    
   
2151
        fshell.run(new String[]{"-chgrp", "-R", fstatus.getGroup(), destf.toString()});

    
   
2152
        fshell.run(new String[]{"-chmod", "-R",

    
   
2153
            Integer.toString(fstatus.getPermission().toShort(), 8), destf.toString()});

    
   
2154
      } catch (Exception e) {

    
   
2155
        throw new HiveException("Unable to set permissions of " + destf

    
   
2156
            + " to those of its parent " + destf.getParent(), e);

    
   
2157
      }

    
   
2158
    }

    
   
2159
    return success;

    
   
2160
  }

    
   
2161

   
2116
  static protected void copyFiles(HiveConf conf, Path srcf, Path destf, FileSystem fs)
2162
  static protected void copyFiles(HiveConf conf, Path srcf, Path destf, FileSystem fs)
2117
      throws HiveException {
2163
      throws HiveException {

    
   
2164
    boolean inheritPerms = HiveConf.getBoolVar(conf,

    
   
2165
        HiveConf.ConfVars.HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS);
2118
    try {
2166
    try {
2119
      // create the destination if it does not exist
2167
      // create the destination if it does not exist
2120
      if (!fs.exists(destf)) {
2168
      if (!fs.exists(destf)) {
2121
        fs.mkdirs(destf);
2169
        fs.mkdirs(destf);

    
   
2170
        if (inheritPerms) {

    
   
2171
          fs.setPermission(destf, fs.getFileStatus(destf.getParent()).getPermission());

    
   
2172
        }
2122
      }
2173
      }
2123
    } catch (IOException e) {
2174
    } catch (IOException e) {
2124
      throw new HiveException(
2175
      throw new HiveException(
2125
          "copyFiles: error while checking/creating destination directory!!!",
2176
          "copyFiles: error while checking/creating destination directory!!!",
2126
          e);
2177
          e);
[+20] [20] 16 lines
[+20] static protected void copyFiles(HiveConf conf, Path srcf, Path destf, FileSystem fs)
2143

    
   
2194

   
2144
    // move it, move it
2195
    // move it, move it
2145
    try {
2196
    try {
2146
      for (List<Path[]> sdpairs : result) {
2197
      for (List<Path[]> sdpairs : result) {
2147
        for (Path[] sdpair : sdpairs) {
2198
        for (Path[] sdpair : sdpairs) {
2148
          if (!fs.rename(sdpair[0], sdpair[1])) {
2199
          if (!renameFile(conf, sdpair[0], sdpair[1], fs)) {
2149
            throw new IOException("Cannot move " + sdpair[0] + " to " + sdpair[1]);
2200
            throw new IOException("Cannot move " + sdpair[0] + " to " + sdpair[1]);
2150
          }
2201
          }
2151
        }
2202
        }
2152
      }
2203
      }
2153
    } catch (IOException e) {
2204
    } catch (IOException e) {
[+20] [20] 18 lines
[+20] static protected void copyFiles(HiveConf conf, Path srcf, Path destf, FileSystem fs)
2172
   */
2223
   */
2173
  static protected void replaceFiles(Path srcf, Path destf, Path oldPath, HiveConf conf)
2224
  static protected void replaceFiles(Path srcf, Path destf, Path oldPath, HiveConf conf)
2174
      throws HiveException {
2225
      throws HiveException {
2175
    try {
2226
    try {
2176
      FileSystem fs = srcf.getFileSystem(conf);
2227
      FileSystem fs = srcf.getFileSystem(conf);

    
   
2228
      boolean inheritPerms = HiveConf.getBoolVar(conf,

    
   
2229
          HiveConf.ConfVars.HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS);
2177

    
   
2230

   
2178
      // check if srcf contains nested sub-directories
2231
      // check if srcf contains nested sub-directories
2179
      FileStatus[] srcs;
2232
      FileStatus[] srcs;
2180
      try {
2233
      try {
2181
        srcs = fs.globStatus(srcf);
2234
        srcs = fs.globStatus(srcf);
[+20] [20] 23 lines
[+20] static protected void replaceFiles(Path srcf, Path destf, Path oldPath, HiveConf conf)
2205
      }
2258
      }
2206

    
   
2259

   
2207
      // rename src directory to destf
2260
      // rename src directory to destf
2208
      if (srcs.length == 1 && srcs[0].isDir()) {
2261
      if (srcs.length == 1 && srcs[0].isDir()) {
2209
        // rename can fail if the parent doesn't exist
2262
        // rename can fail if the parent doesn't exist
2210
        if (!fs.exists(destf.getParent())) {
2263
        Path destfp = destf.getParent();
2211
          fs.mkdirs(destf.getParent());
2264
        if (!fs.exists(destfp)) {

    
   
2265
          boolean success = fs.mkdirs(destfp);

    
   
2266
          if (inheritPerms && success) {

    
   
2267
            fs.setPermission(destfp, fs.getFileStatus(destfp.getParent()).getPermission());

    
   
2268
          }
2212
        }
2269
        }
2213
        if (fs.exists(destf)) {
2270
        if (fs.exists(destf)) {
2214
          fs.delete(destf, true);
2271
          fs.delete(destf, true);
2215
        }
2272
        }
2216

    
   
2273

   
2217
        boolean b = fs.rename(srcs[0].getPath(), destf);
2274
        boolean b = renameFile(conf, srcs[0].getPath(), destf, fs);
2218
        if (!b) {
2275
        if (!b) {
2219
          throw new HiveException("Unable to move results from " + srcs[0].getPath()
2276
          throw new HiveException("Unable to move results from " + srcs[0].getPath()
2220
              + " to destination directory: " + destf);
2277
              + " to destination directory: " + destf);
2221
        }
2278
        }
2222
        LOG.debug("Renaming:" + srcf.toString() + " to " + destf.toString()  + ",Status:" + b);

   
2223
      } else { // srcf is a file or pattern containing wildcards
2279
      } else { // srcf is a file or pattern containing wildcards
2224
        if (!fs.exists(destf)) {
2280
        if (!fs.exists(destf)) {
2225
          fs.mkdirs(destf);
2281
          boolean success = fs.mkdirs(destf);

    
   
2282
          if (inheritPerms && success) {

    
   
2283
            fs.setPermission(destf, fs.getFileStatus(destf.getParent()).getPermission());

    
   
2284
          }
2226
        }
2285
        }
2227
        // srcs must be a list of files -- ensured by LoadSemanticAnalyzer
2286
        // srcs must be a list of files -- ensured by LoadSemanticAnalyzer
2228
        for (List<Path[]> sdpairs : result) {
2287
        for (List<Path[]> sdpairs : result) {
2229
          for (Path[] sdpair : sdpairs) {
2288
          for (Path[] sdpair : sdpairs) {
2230
            if (!fs.rename(sdpair[0], sdpair[1])) {
2289
            if (!renameFile(conf, sdpair[0], sdpair[1], fs)) {
2231
              throw new IOException("Error moving: " + sdpair[0] + " into: " + sdpair[1]);
2290
              throw new IOException("Error moving: " + sdpair[0] + " into: " + sdpair[1]);
2232
            }
2291
            }
2233
          }
2292
          }
2234
        }
2293
        }
2235
      }
2294
      }
[+20] [20] 206 lines
  1. ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java: Loading...