Review Board 1.7.22


Add copyBytes method to Text and BytesWritable to improve usability

Review Request #182 - Created Dec. 15, 2010 and updated

Owen O'Malley
trunk
https://issues.apache.org/jira/browse/HADOOP-6298
Reviewers
hadoop-common
hadoop-common
Users would like a method that returned byte arrays of precisely the right length, even if it requires a data copy.
Added unit tests and passed Hudson
http://svn.apache.org/repos/asf/hadoop/common/trunk/src/java/org/apache/hadoop/io/BytesWritable.java
Revision 1049968 New Change
1
/**
1
/**
2
 * Licensed to the Apache Software Foundation (ASF) under one
2
 * Licensed to the Apache Software Foundation (ASF) under one
3
 * or more contributor license agreements.  See the NOTICE file
3
 * or more contributor license agreements.  See the NOTICE file
4
 * distributed with this work for additional information
4
 * distributed with this work for additional information
5
 * regarding copyright ownership.  The ASF licenses this file
5
 * regarding copyright ownership.  The ASF licenses this file
6
 * to you under the Apache License, Version 2.0 (the
6
 * to you under the Apache License, Version 2.0 (the
7
 * "License"); you may not use this file except in compliance
7
 * "License"); you may not use this file except in compliance
8
 * with the License.  You may obtain a copy of the License at
8
 * with the License.  You may obtain a copy of the License at
9
 *
9
 *
10
 *     http://www.apache.org/licenses/LICENSE-2.0
10
 *     http://www.apache.org/licenses/LICENSE-2.0
11
 *
11
 *
12
 * Unless required by applicable law or agreed to in writing, software
12
 * Unless required by applicable law or agreed to in writing, software
13
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * distributed under the License is distributed on an "AS IS" BASIS,
14
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
 * See the License for the specific language governing permissions and
15
 * See the License for the specific language governing permissions and
16
 * limitations under the License.
16
 * limitations under the License.
17
 */
17
 */
18

    
   
18

   
19
package org.apache.hadoop.io;
19
package org.apache.hadoop.io;
20

    
   
20

   
21
import java.io.IOException;
21
import java.io.IOException;
22
import java.io.DataInput;
22
import java.io.DataInput;
23
import java.io.DataOutput;
23
import java.io.DataOutput;
24

    
   
24

   
25
import org.apache.commons.logging.Log;

   
26
import org.apache.commons.logging.LogFactory;

   
27
import org.apache.hadoop.classification.InterfaceAudience;
25
import org.apache.hadoop.classification.InterfaceAudience;
28
import org.apache.hadoop.classification.InterfaceStability;
26
import org.apache.hadoop.classification.InterfaceStability;
29

    
   
27

   
30
/** 
28
/** 
31
 * A byte sequence that is usable as a key or value.
29
 * A byte sequence that is usable as a key or value.
32
 * It is resizable and distinguishes between the size of the seqeunce and
30
 * It is resizable and distinguishes between the size of the seqeunce and
33
 * the current capacity. The hash function is the front of the md5 of the 
31
 * the current capacity. The hash function is the front of the md5 of the 
34
 * buffer. The sort order is the same as memcmp.
32
 * buffer. The sort order is the same as memcmp.
35
 */
33
 */
36
@InterfaceAudience.Public
34
@InterfaceAudience.Public
37
@InterfaceStability.Stable
35
@InterfaceStability.Stable
38
public class BytesWritable extends BinaryComparable
36
public class BytesWritable extends BinaryComparable
39
    implements WritableComparable<BinaryComparable> {
37
    implements WritableComparable<BinaryComparable> {
40
  private static final Log LOG = LogFactory.getLog(BytesWritable.class);

   
41
  private static final int LENGTH_BYTES = 4;
38
  private static final int LENGTH_BYTES = 4;
42
  private static final byte[] EMPTY_BYTES = {};
39
  private static final byte[] EMPTY_BYTES = {};
43

    
   
40

   
44
  private int size;
41
  private int size;
45
  private byte[] bytes;
42
  private byte[] bytes;
46
  
43
  
47
  /**
44
  /**
48
   * Create a zero-size sequence.
45
   * Create a zero-size sequence.
49
   */
46
   */
50
  public BytesWritable() {this(EMPTY_BYTES);}
47
  public BytesWritable() {this(EMPTY_BYTES);}
51
  
48
  
52
  /**
49
  /**
53
   * Create a BytesWritable using the byte array as the initial value.
50
   * Create a BytesWritable using the byte array as the initial value.
54
   * @param bytes This array becomes the backing storage for the object.
51
   * @param bytes This array becomes the backing storage for the object.
55
   */
52
   */
56
  public BytesWritable(byte[] bytes) {
53
  public BytesWritable(byte[] bytes) {
57
    this.bytes = bytes;
54
    this.bytes = bytes;
58
    this.size = bytes.length;
55
    this.size = bytes.length;
59
  }
56
  }
60
  
57
  
61
  /**
58
  /**
62
   * Get the data from the BytesWritable.
59
   * Get a copy of the bytes that is exactly the length of the data.

    
   
60
   */

    
   
61
  public byte[] copyBytes() {

    
   
62
    byte[] result = new byte[size];

    
   
63
    System.arraycopy(bytes, 0, result, 0, size);

    
   
64
    return result;

    
   
65
  }

    
   
66
  

    
   
67
  /**

    
   
68
   * Get the data backing the BytesWritable. Please use copyBytes() if you

    
   
69
   * need the returned array to be the precise length of the data.
63
   * @return The data is only valid between 0 and getLength() - 1.
70
   * @return The data is only valid between 0 and getLength() - 1.
64
   */
71
   */
65
  public byte[] getBytes() {
72
  public byte[] getBytes() {
66
    return bytes;
73
    return bytes;
67
  }
74
  }
68

    
   
75

   
69
  /**
76
  /**
70
   * Get the data from the BytesWritable.
77
   * Get the data from the BytesWritable.
71
   * @deprecated Use {@link #getBytes()} instead.
78
   * @deprecated Use {@link #getBytes()} instead.
72
   */
79
   */
73
  @Deprecated
80
  @Deprecated
74
  public byte[] get() {
81
  public byte[] get() {
75
    return getBytes();
82
    return getBytes();
76
  }
83
  }
77

    
   
84

   
78
  /**
85
  /**
79
   * Get the current size of the buffer.
86
   * Get the current size of the buffer.
80
   */
87
   */
81
  public int getLength() {
88
  public int getLength() {
82
    return size;
89
    return size;
83
  }
90
  }
84

    
   
91

   
85
  /**
92
  /**
86
   * Get the current size of the buffer.
93
   * Get the current size of the buffer.
87
   * @deprecated Use {@link #getLength()} instead.
94
   * @deprecated Use {@link #getLength()} instead.
88
   */
95
   */
89
  @Deprecated
96
  @Deprecated
90
  public int getSize() {
97
  public int getSize() {
91
    return getLength();
98
    return getLength();
92
  }
99
  }
93
  
100
  
94
  /**
101
  /**
95
   * Change the size of the buffer. The values in the old range are preserved
102
   * Change the size of the buffer. The values in the old range are preserved
96
   * and any new values are undefined. The capacity is changed if it is 
103
   * and any new values are undefined. The capacity is changed if it is 
97
   * necessary.
104
   * necessary.
98
   * @param size The new number of bytes
105
   * @param size The new number of bytes
99
   */
106
   */
100
  public void setSize(int size) {
107
  public void setSize(int size) {
101
    if (size > getCapacity()) {
108
    if (size > getCapacity()) {
102
      setCapacity(size * 3 / 2);
109
      setCapacity(size * 3 / 2);
103
    }
110
    }
104
    this.size = size;
111
    this.size = size;
105
  }
112
  }
106
  
113
  
107
  /**
114
  /**
108
   * Get the capacity, which is the maximum size that could handled without
115
   * Get the capacity, which is the maximum size that could handled without
109
   * resizing the backing storage.
116
   * resizing the backing storage.
110
   * @return The number of bytes
117
   * @return The number of bytes
111
   */
118
   */
112
  public int getCapacity() {
119
  public int getCapacity() {
113
    return bytes.length;
120
    return bytes.length;
114
  }
121
  }
115
  
122
  
116
  /**
123
  /**
117
   * Change the capacity of the backing storage.
124
   * Change the capacity of the backing storage.
118
   * The data is preserved.
125
   * The data is preserved.
119
   * @param new_cap The new capacity in bytes.
126
   * @param new_cap The new capacity in bytes.
120
   */
127
   */
121
  public void setCapacity(int new_cap) {
128
  public void setCapacity(int new_cap) {
122
    if (new_cap != getCapacity()) {
129
    if (new_cap != getCapacity()) {
123
      byte[] new_data = new byte[new_cap];
130
      byte[] new_data = new byte[new_cap];
124
      if (new_cap < size) {
131
      if (new_cap < size) {
125
        size = new_cap;
132
        size = new_cap;
126
      }
133
      }
127
      if (size != 0) {
134
      if (size != 0) {
128
        System.arraycopy(bytes, 0, new_data, 0, size);
135
        System.arraycopy(bytes, 0, new_data, 0, size);
129
      }
136
      }
130
      bytes = new_data;
137
      bytes = new_data;
131
    }
138
    }
132
  }
139
  }
133

    
   
140

   
134
  /**
141
  /**
135
   * Set the BytesWritable to the contents of the given newData.
142
   * Set the BytesWritable to the contents of the given newData.
136
   * @param newData the value to set this BytesWritable to.
143
   * @param newData the value to set this BytesWritable to.
137
   */
144
   */
138
  public void set(BytesWritable newData) {
145
  public void set(BytesWritable newData) {
139
    set(newData.bytes, 0, newData.size);
146
    set(newData.bytes, 0, newData.size);
140
  }
147
  }
141

    
   
148

   
142
  /**
149
  /**
143
   * Set the value to a copy of the given byte range
150
   * Set the value to a copy of the given byte range
144
   * @param newData the new values to copy in
151
   * @param newData the new values to copy in
145
   * @param offset the offset in newData to start at
152
   * @param offset the offset in newData to start at
146
   * @param length the number of bytes to copy
153
   * @param length the number of bytes to copy
147
   */
154
   */
148
  public void set(byte[] newData, int offset, int length) {
155
  public void set(byte[] newData, int offset, int length) {
149
    setSize(0);
156
    setSize(0);
150
    setSize(length);
157
    setSize(length);
151
    System.arraycopy(newData, offset, bytes, 0, size);
158
    System.arraycopy(newData, offset, bytes, 0, size);
152
  }
159
  }
153

    
   
160

   
154
  // inherit javadoc
161
  // inherit javadoc
155
  public void readFields(DataInput in) throws IOException {
162
  public void readFields(DataInput in) throws IOException {
156
    setSize(0); // clear the old data
163
    setSize(0); // clear the old data
157
    setSize(in.readInt());
164
    setSize(in.readInt());
158
    in.readFully(bytes, 0, size);
165
    in.readFully(bytes, 0, size);
159
  }
166
  }
160
  
167
  
161
  // inherit javadoc
168
  // inherit javadoc
162
  public void write(DataOutput out) throws IOException {
169
  public void write(DataOutput out) throws IOException {
163
    out.writeInt(size);
170
    out.writeInt(size);
164
    out.write(bytes, 0, size);
171
    out.write(bytes, 0, size);
165
  }
172
  }
166
  
173
  
167
  public int hashCode() {
174
  public int hashCode() {
168
    return super.hashCode();
175
    return super.hashCode();
169
  }
176
  }
170

    
   
177

   
171
  /**
178
  /**
172
   * Are the two byte sequences equal?
179
   * Are the two byte sequences equal?
173
   */
180
   */
174
  public boolean equals(Object right_obj) {
181
  public boolean equals(Object right_obj) {
175
    if (right_obj instanceof BytesWritable)
182
    if (right_obj instanceof BytesWritable)
176
      return super.equals(right_obj);
183
      return super.equals(right_obj);
177
    return false;
184
    return false;
178
  }
185
  }
179

    
   
186

   
180
  /**
187
  /**
181
   * Generate the stream of bytes as hex pairs separated by ' '.
188
   * Generate the stream of bytes as hex pairs separated by ' '.
182
   */
189
   */
183
  public String toString() { 
190
  public String toString() { 
184
    StringBuilder sb = new StringBuilder(3*size);
191
    StringBuilder sb = new StringBuilder(3*size);
185
    for (int idx = 0; idx < size; idx++) {
192
    for (int idx = 0; idx < size; idx++) {
186
      // if not the first, put a blank separator in
193
      // if not the first, put a blank separator in
187
      if (idx != 0) {
194
      if (idx != 0) {
188
        sb.append(' ');
195
        sb.append(' ');
189
      }
196
      }
190
      String num = Integer.toHexString(0xff & bytes[idx]);
197
      String num = Integer.toHexString(0xff & bytes[idx]);
191
      // if it is only one digit, add a leading 0.
198
      // if it is only one digit, add a leading 0.
192
      if (num.length() < 2) {
199
      if (num.length() < 2) {
193
        sb.append('0');
200
        sb.append('0');
194
      }
201
      }
195
      sb.append(num);
202
      sb.append(num);
196
    }
203
    }
197
    return sb.toString();
204
    return sb.toString();
198
  }
205
  }
199

    
   
206

   
200
  /** A Comparator optimized for BytesWritable. */ 
207
  /** A Comparator optimized for BytesWritable. */ 
201
  public static class Comparator extends WritableComparator {
208
  public static class Comparator extends WritableComparator {
202
    public Comparator() {
209
    public Comparator() {
203
      super(BytesWritable.class);
210
      super(BytesWritable.class);
204
    }
211
    }
205
    
212
    
206
    /**
213
    /**
207
     * Compare the buffers in serialized form.
214
     * Compare the buffers in serialized form.
208
     */
215
     */
209
    public int compare(byte[] b1, int s1, int l1,
216
    public int compare(byte[] b1, int s1, int l1,
210
                       byte[] b2, int s2, int l2) {
217
                       byte[] b2, int s2, int l2) {
211
      return compareBytes(b1, s1+LENGTH_BYTES, l1-LENGTH_BYTES, 
218
      return compareBytes(b1, s1+LENGTH_BYTES, l1-LENGTH_BYTES, 
212
                          b2, s2+LENGTH_BYTES, l2-LENGTH_BYTES);
219
                          b2, s2+LENGTH_BYTES, l2-LENGTH_BYTES);
213
    }
220
    }
214
  }
221
  }
215
  
222
  
216
  static {                                        // register this comparator
223
  static {                                        // register this comparator
217
    WritableComparator.define(BytesWritable.class, new Comparator());
224
    WritableComparator.define(BytesWritable.class, new Comparator());
218
  }
225
  }
219
  
226
  
220
}
227
}
http://svn.apache.org/repos/asf/hadoop/common/trunk/src/java/org/apache/hadoop/io/Text.java
Revision 1049968 New Change
 
http://svn.apache.org/repos/asf/hadoop/common/trunk/src/test/core/org/apache/hadoop/io/TestBytesWritable.java
Revision 1049968 New Change
 
http://svn.apache.org/repos/asf/hadoop/common/trunk/src/test/core/org/apache/hadoop/io/TestText.java
Revision 1049968 New Change
 
  1. http://svn.apache.org/repos/asf/hadoop/common/trunk/src/java/org/apache/hadoop/io/BytesWritable.java: Loading...
  2. http://svn.apache.org/repos/asf/hadoop/common/trunk/src/java/org/apache/hadoop/io/Text.java: Loading...
  3. http://svn.apache.org/repos/asf/hadoop/common/trunk/src/test/core/org/apache/hadoop/io/TestBytesWritable.java: Loading...
  4. http://svn.apache.org/repos/asf/hadoop/common/trunk/src/test/core/org/apache/hadoop/io/TestText.java: Loading...