Review Board 1.7.22


Column stats : LOW_VALUE (or HIGH_VALUE) will always be 0.0000 ,if all the column values larger than 0.0 (or if all column values smaller than 0.0)

Review Request #11172 - Created May 15, 2013 and updated

Zhuoluo Yang
trunk
HIVE-4561
Reviewers
hive
ashutoshc, caofangkun, carl, cws, shreepadma
hive
An initialization error.
Make double and long initialize correctly.
Would you review that and assign the issue to me?
ant test -Dtestcase=TestCliDriver -Dqfile=compute_stats_long.q
ant test -Dtestcase=TestCliDriver -Dqfile=compute_stats_double.q

done.
http://svn.apache.org/repos/asf/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
Revision 1489292 New Change
[20] 463 lines
[+20] [+] public void initNDVEstimator(LongStatsAgg aggBuffer, int numBitVectors) {
464

    
   
464

   
465
    @Override
465
    @Override
466
    public void reset(AggregationBuffer agg) throws HiveException {
466
    public void reset(AggregationBuffer agg) throws HiveException {
467
      LongStatsAgg myagg = (LongStatsAgg) agg;
467
      LongStatsAgg myagg = (LongStatsAgg) agg;
468
      myagg.columnType = new String("Long");
468
      myagg.columnType = new String("Long");
469
      myagg.min = 0;
469
      // We set Long.MAX_VALUE here instead of 0. In case of
470
      myagg.max = 0;
470
      // all the long values are positive, we can get the right min.

    
   
471
      myagg.min = Long.MAX_VALUE;

    
   
472
      // We set Long.MIN_VALUE here instead of 0. In case of

    
   
473
      // all the long values are negative, we can get the right max.

    
   
474
      myagg.max = Long.MIN_VALUE;
471
      myagg.countNulls = 0;
475
      myagg.countNulls = 0;
472
      myagg.firstItem = true;
476
      myagg.firstItem = true;
473
    }
477
    }
474

    
   
478

   
475
    boolean warned = false;
479
    boolean warned = false;
[+20] [20] 136 lines
[+20] [+] public Object terminate(AggregationBuffer agg) throws HiveException {
612
        numDV = myagg.numDV.estimateNumDistinctValues();
616
        numDV = myagg.numDV.estimateNumDistinctValues();
613
      }
617
      }
614

    
   
618

   
615
      // Serialize the result struct
619
      // Serialize the result struct
616
      ((Text) result[0]).set(myagg.columnType);
620
      ((Text) result[0]).set(myagg.columnType);

    
   
621
      if (myagg.min < Long.MAX_VALUE) {
617
      ((LongWritable) result[1]).set(myagg.min);
622
        ((LongWritable) result[1]).set(myagg.min);

    
   
623
      } else {

    
   
624
        result[1] = null;

    
   
625
      }

    
   
626
      if (myagg.max > Long.MIN_VALUE) {
618
      ((LongWritable) result[2]).set(myagg.max);
627
        ((LongWritable) result[2]).set(myagg.max);

    
   
628
      } else {

    
   
629
        result[2] = null;

    
   
630
      }
619
      ((LongWritable) result[3]).set(myagg.countNulls);
631
      ((LongWritable) result[3]).set(myagg.countNulls);
620
      ((LongWritable) result[4]).set(numDV);
632
      ((LongWritable) result[4]).set(numDV);
621

    
   
633

   
622
      return result;
634
      return result;
623
    }
635
    }
[+20] [20] 160 lines
[+20] [+] public void initNDVEstimator(DoubleStatsAgg aggBuffer, int numBitVectors) {
784

    
   
796

   
785
    @Override
797
    @Override
786
    public void reset(AggregationBuffer agg) throws HiveException {
798
    public void reset(AggregationBuffer agg) throws HiveException {
787
      DoubleStatsAgg myagg = (DoubleStatsAgg) agg;
799
      DoubleStatsAgg myagg = (DoubleStatsAgg) agg;
788
      myagg.columnType = new String("Double");
800
      myagg.columnType = new String("Double");
789
      myagg.min = 0.0;
801
      // We set Double.POSITIVE_INFINITY here instead of 0. In case of
790
      myagg.max = 0.0;
802
      // all the double values are positive, we can get the right min.

    
   
803
      myagg.min = Double.POSITIVE_INFINITY;

    
   
804
      // We set Double.NEGATIVE_INFINITY here instead of 0. In case of

    
   
805
      // all the double values are negative, we can get the right max.

    
   
806
      myagg.max = Double.NEGATIVE_INFINITY;
791
      myagg.countNulls = 0;
807
      myagg.countNulls = 0;
792
      myagg.firstItem = true;
808
      myagg.firstItem = true;
793
    }
809
    }
794

    
   
810

   
795
    boolean warned = false;
811
    boolean warned = false;
[+20] [20] 137 lines
[+20] [+] public Object terminate(AggregationBuffer agg) throws HiveException {
933
        numDV = myagg.numDV.estimateNumDistinctValues();
949
        numDV = myagg.numDV.estimateNumDistinctValues();
934
      }
950
      }
935

    
   
951

   
936
      // Serialize the result struct
952
      // Serialize the result struct
937
      ((Text) result[0]).set(myagg.columnType);
953
      ((Text) result[0]).set(myagg.columnType);

    
   
954
      if (myagg.min < Double.POSITIVE_INFINITY) {
938
      ((DoubleWritable) result[1]).set(myagg.min);
955
        ((DoubleWritable) result[1]).set(myagg.min);

    
   
956
      } else {

    
   
957
        result[1] = null;

    
   
958
      }

    
   
959
      if (myagg.max > Double.NEGATIVE_INFINITY) {
939
      ((DoubleWritable) result[2]).set(myagg.max);
960
        ((DoubleWritable) result[2]).set(myagg.max);

    
   
961
      } else {

    
   
962
        result[2] = null;

    
   
963
      }
940
      ((LongWritable) result[3]).set(myagg.countNulls);
964
      ((LongWritable) result[3]).set(myagg.countNulls);
941
      ((LongWritable) result[4]).set(numDV);
965
      ((LongWritable) result[4]).set(numDV);
942

    
   
966

   
943
      return result;
967
      return result;
944
    }
968
    }
[+20] [20] 618 lines
http://svn.apache.org/repos/asf/hive/trunk/ql/src/test/results/clientpositive/compute_stats_empty_table.q.out
Revision 1489292 New Change
 
http://svn.apache.org/repos/asf/hive/trunk/ql/src/test/results/clientpositive/compute_stats_long.q.out
Revision 1489292 New Change
 
  1. http://svn.apache.org/repos/asf/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java: Loading...
  2. http://svn.apache.org/repos/asf/hive/trunk/ql/src/test/results/clientpositive/compute_stats_empty_table.q.out: Loading...
  3. http://svn.apache.org/repos/asf/hive/trunk/ql/src/test/results/clientpositive/compute_stats_long.q.out: Loading...