Review Board 1.7.22


[PIG-3255] Avoid extra byte array copy in streaming deserialize

Review Request #14030 - Created Sept. 9, 2013 and submitted

Rohini Palaniswamy
PIG-3255
Reviewers
pig
gates
pig
Added a new abstract class PigStreamingBase to have the new APIs for serializing and deserializing..Deprecated PigtoSTreaming and StreamingToPig
No new unit tests. TestStreaming and TestStreamingLocal cover the changes and the tests passes.
http://svn.apache.org/repos/asf/pig/trunk/src/org/apache/pig/PigStreamingBase.java
New File

    
   
1
/*

    
   
2
 * Licensed to the Apache Software Foundation (ASF) under one

    
   
3
 * or more contributor license agreements.  See the NOTICE file

    
   
4
 * distributed with this work for additional information

    
   
5
 * regarding copyright ownership.  The ASF licenses this file

    
   
6
 * to you under the Apache License, Version 2.0 (the

    
   
7
 * "License"); you may not use this file except in compliance

    
   
8
 * with the License.  You may obtain a copy of the License at

    
   
9
 *

    
   
10
 *     http://www.apache.org/licenses/LICENSE-2.0

    
   
11
 *

    
   
12
 * Unless required by applicable law or agreed to in writing, software

    
   
13
 * distributed under the License is distributed on an "AS IS" BASIS,

    
   
14
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

    
   
15
 * See the License for the specific language governing permissions and

    
   
16
 * limitations under the License.

    
   
17
 */

    
   
18
package org.apache.pig;

    
   
19

   

    
   
20
import java.io.IOException;

    
   
21

   

    
   
22
import org.apache.pig.classification.InterfaceAudience;

    
   
23
import org.apache.pig.classification.InterfaceStability;

    
   
24
import org.apache.pig.data.WritableByteArray;

    
   
25
import org.apache.pig.data.Tuple;

    
   
26

   

    
   
27
/**

    
   
28
 * The interface is used for the custom mapping of

    
   
29
 *    - a {@link Tuple} to a byte array. The byte array is fed to the stdin

    
   
30
 *      of the streaming process.

    
   
31
 *    - a byte array, received from the stdout of the streaming process,

    
   
32
 *      to a {@link Tuple}.

    
   
33
 *

    
   
34
 * This interface is designed to provide a common protocol for data exchange

    
   
35
 * between Pig runtime and streaming executables.

    
   
36
 *

    
   
37
 * Typically, a user implements this interface for a particular type of stream

    
   
38
 * command and specifies the implementation class in the Pig DEFINE statement.

    
   
39
 *

    
   
40
 * @since Pig 0.12

    
   
41
 */

    
   
42
@InterfaceAudience.Public

    
   
43
@InterfaceStability.Stable

    
   
44
public abstract class PigStreamingBase implements PigToStream, StreamToPig {

    
   
45

   

    
   
46
    @Override

    
   
47
    @Deprecated

    
   
48
    public final byte[] serialize(Tuple t) throws IOException {

    
   
49
        WritableByteArray data = serializeToBytes(t);

    
   
50
        if (data.getLength() == data.getData().length) {

    
   
51
            return data.getData();

    
   
52
        } else {

    
   
53
            byte[] buf = new byte[data.getLength()];

    
   
54
            System.arraycopy(data.getData(), 0, buf, 0, data.getLength());

    
   
55
            return buf;

    
   
56
        }

    
   
57
    }

    
   
58

   

    
   
59
    /**

    
   
60
     * Given a tuple, produce an array of bytes to be passed to the streaming

    
   
61
     * executable.

    
   
62
     * @param t Tuple to serialize

    
   
63
     * @return Serialized form of the tuple

    
   
64
     * @throws IOException

    
   
65
     */

    
   
66
    public abstract WritableByteArray serializeToBytes(Tuple t) throws IOException;

    
   
67

   

    
   
68
    @Override

    
   
69
    @Deprecated

    
   
70
    public final Tuple deserialize(byte[] bytes) throws IOException {

    
   
71
        return deserialize(bytes, 0, bytes.length);

    
   
72
    }

    
   
73

   

    
   
74
    /**

    
   
75
     * Given a byte array from a streaming executable, produce a tuple.

    
   
76
     * @param bytes  bytes to deserialize.

    
   
77
     * @param offset the offset in the byte array from which to deserialize.

    
   
78
     * @param length the number of bytes from the offset of the byte array to deserialize.

    
   
79
     * @return Data as a Pig Tuple.

    
   
80
     * @throws IOException

    
   
81
     */

    
   
82
    public abstract Tuple deserialize(byte[] bytes, int offset, int length) throws IOException;

    
   
83

   

    
   
84
    /**

    
   
85
     * This will be called on the front end during planning and not on the back

    
   
86
     * end during execution.

    
   
87
     *

    
   
88
     * @return the {@link LoadCaster} associated with this object, or null if

    
   
89
     * there is no such LoadCaster.

    
   
90
     * @throws IOException if there is an exception during LoadCaster

    
   
91
     */

    
   
92
    public abstract LoadCaster getLoadCaster() throws IOException;

    
   
93

   

    
   
94
}
http://svn.apache.org/repos/asf/pig/trunk/src/org/apache/pig/PigToStream.java
Revision 1523070 New Change
 
http://svn.apache.org/repos/asf/pig/trunk/src/org/apache/pig/StreamToPig.java
Revision 1523070 New Change
 
http://svn.apache.org/repos/asf/pig/trunk/src/org/apache/pig/builtin/PigStreaming.java
Revision 1523070 New Change
 
http://svn.apache.org/repos/asf/pig/trunk/src/org/apache/pig/data/WritableByteArray.java
New File
 
http://svn.apache.org/repos/asf/pig/trunk/src/org/apache/pig/impl/streaming/InputHandler.java
Revision 1523070 New Change
 
http://svn.apache.org/repos/asf/pig/trunk/src/org/apache/pig/impl/streaming/OutputHandler.java
Revision 1523070 New Change
 
http://svn.apache.org/repos/asf/pig/trunk/src/org/apache/pig/impl/util/StorageUtil.java
Revision 1523070 New Change
 
  1. http://svn.apache.org/repos/asf/pig/trunk/src/org/apache/pig/PigStreamingBase.java: Loading...
  2. http://svn.apache.org/repos/asf/pig/trunk/src/org/apache/pig/PigToStream.java: Loading...
  3. http://svn.apache.org/repos/asf/pig/trunk/src/org/apache/pig/StreamToPig.java: Loading...
  4. http://svn.apache.org/repos/asf/pig/trunk/src/org/apache/pig/builtin/PigStreaming.java: Loading...
  5. http://svn.apache.org/repos/asf/pig/trunk/src/org/apache/pig/data/WritableByteArray.java: Loading...
  6. http://svn.apache.org/repos/asf/pig/trunk/src/org/apache/pig/impl/streaming/InputHandler.java: Loading...
  7. http://svn.apache.org/repos/asf/pig/trunk/src/org/apache/pig/impl/streaming/OutputHandler.java: Loading...
  8. http://svn.apache.org/repos/asf/pig/trunk/src/org/apache/pig/impl/util/StorageUtil.java: Loading...