package com.alibaba.dt.onedata3.profiling.udaf;

import com.alibaba.dt.onedata3.profiling.udaf.FrequentItem;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.clearspring.analytics.stream.quantile.TDigest;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;

/* loaded from: input_file:com/alibaba/dt/onedata3/profiling/udaf/ColumnProfilingToJson.class */
public class ColumnProfilingToJson {
    private ColumnProfiling columnProfiling;
    private static int HISTOGRAMS_CNT = 10;
    private static String HISTOGRAMS_SPLIT_CHAR = "|";
    private static int HISTOGRAMS_STRING_LEN_STEP = 7;
    private static double MIN_UNIQUE_KEY_RATE_REMOVE_TOP10_FREQUENT_ITEM = 0.95d;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/alibaba/dt/onedata3/profiling/udaf/ColumnProfilingToJson$Histograms.class */
    public static class Histograms {
        private static int QUANTILES_CNT = 101;
        private static int RANGE_HISTOGRAMS_CNT = ColumnProfilingToJson.HISTOGRAMS_CNT - 2;

        /* JADX INFO: Access modifiers changed from: private */
        /* loaded from: input_file:com/alibaba/dt/onedata3/profiling/udaf/ColumnProfilingToJson$Histograms$Node.class */
        public static class Node {
            double min;
            double max;
            long cnt;

            private Node() {
            }
        }

        private Histograms() {
        }

        public static JSONArray getJsonArray(TDigest tDigest, double d, double d2, int i) {
            return toJsonArray(getArray(tDigest, d, d2), i);
        }

        public static JSONArray getJsonArray(long[] jArr, int i) {
            int i2 = 0;
            for (long j : jArr) {
                if (j > 0) {
                    i2++;
                }
            }
            return i2 <= ColumnProfilingToJson.HISTOGRAMS_CNT ? getJsonArrayFewLength(jArr, i) : getJsonArrayMultiLength(jArr, i);
        }

        private static JSONArray getJsonArrayMultiLength(long[] jArr, int i) {
            String str;
            long[] jArr2 = new long[ColumnProfilingToJson.HISTOGRAMS_CNT];
            jArr2[0] = jArr[0];
            for (int i2 = 1; i2 < jArr.length; i2++) {
                int min = Math.min(((i2 - 1) / ColumnProfilingToJson.HISTOGRAMS_STRING_LEN_STEP) + 1, jArr2.length - 1);
                jArr2[min] = jArr2[min] + jArr[i2];
            }
            JSONArray jSONArray = new JSONArray();
            for (int i3 = 0; i3 < jArr2.length; i3++) {
                if (i3 == 0) {
                    str = "0" + ColumnProfilingToJson.HISTOGRAMS_SPLIT_CHAR + "0";
                } else {
                    int i4 = ((i3 - 1) * ColumnProfilingToJson.HISTOGRAMS_STRING_LEN_STEP) + 1;
                    int i5 = (i4 + ColumnProfilingToJson.HISTOGRAMS_STRING_LEN_STEP) - 1;
                    if (i3 == jArr2.length - 1) {
                        i5 = Math.max(i, jArr.length);
                    }
                    str = String.valueOf(i4) + ColumnProfilingToJson.HISTOGRAMS_SPLIT_CHAR + String.valueOf(i5);
                }
                String str2 = str;
                JSONObject jSONObject = new JSONObject();
                jSONObject.put(str2, (Object) Long.valueOf(jArr2[i3]));
                jSONArray.add(jSONObject);
            }
            return jSONArray;
        }

        private static JSONArray getJsonArrayFewLength(long[] jArr, int i) {
            int i2 = 0;
            JSONArray jSONArray = new JSONArray();
            for (int i3 = 0; i3 < jArr.length; i3++) {
                if (jArr[i3] > 0) {
                    String str = i3 + ColumnProfilingToJson.HISTOGRAMS_SPLIT_CHAR + i3;
                    JSONObject jSONObject = new JSONObject();
                    jSONObject.put(str, (Object) Long.valueOf(jArr[i3]));
                    jSONArray.add(jSONObject);
                    i2 = i3;
                }
            }
            int size = ColumnProfilingToJson.HISTOGRAMS_CNT - jSONArray.size();
            for (int i4 = 0; i4 < size; i4++) {
                int i5 = i2 + i4 + 1;
                String str2 = i5 + ColumnProfilingToJson.HISTOGRAMS_SPLIT_CHAR + i5;
                JSONObject jSONObject2 = new JSONObject();
                jSONObject2.put(str2, (Object) 0);
                jSONArray.add(jSONObject2);
            }
            return jSONArray;
        }

        private static ArrayList<Node> getArray(TDigest tDigest, double d, double d2) {
            double quantile = tDigest.quantile(0.01d);
            double quantile2 = tDigest.quantile(0.99d);
            double d3 = (quantile2 - quantile) / RANGE_HISTOGRAMS_CNT;
            ArrayList<Node> arrayList = new ArrayList<>();
            Node node = new Node();
            node.min = d;
            node.max = quantile;
            arrayList.add(node);
            for (int i = 0; i < RANGE_HISTOGRAMS_CNT; i++) {
                Node node2 = new Node();
                node2.min = quantile + (d3 * i);
                node2.max = node2.min + d3;
                arrayList.add(node2);
            }
            Node node3 = new Node();
            node3.min = quantile2;
            node3.max = d2;
            arrayList.add(node3);
            long size = tDigest.size() / (QUANTILES_CNT - 1);
            for (int i2 = 0; i2 < QUANTILES_CNT; i2++) {
                double quantile3 = tDigest.quantile(0.01d * i2);
                if (quantile3 <= quantile) {
                    node.cnt += size;
                } else if (quantile3 > quantile2) {
                    node3.cnt += size;
                } else {
                    arrayList.get(((int) ((quantile3 - quantile) / d3)) + 1).cnt += size;
                }
            }
            return arrayList;
        }

        private static JSONArray toJsonArray(ArrayList<Node> arrayList, int i) {
            JSONArray jSONArray = new JSONArray();
            for (int i2 = 0; i2 < arrayList.size(); i2++) {
                Node node = arrayList.get(i2);
                String str = "0" + ColumnProfilingToJson.HISTOGRAMS_SPLIT_CHAR + "0";
                if (i == ColumnProfiling.DOUBLE) {
                    str = String.format("%.02f", Double.valueOf(node.min)) + ColumnProfilingToJson.HISTOGRAMS_SPLIT_CHAR + String.format("%.02f", Double.valueOf(node.max));
                } else if (i == ColumnProfiling.BIGINT) {
                    long j = (long) node.min;
                    long j2 = (long) node.max;
                    if (i2 > 0) {
                        j = Math.min(((long) arrayList.get(i2 - 1).max) + 1, j2);
                    }
                    str = String.valueOf(j) + ColumnProfilingToJson.HISTOGRAMS_SPLIT_CHAR + String.valueOf(j2);
                }
                JSONObject jSONObject = new JSONObject();
                jSONObject.put(str, (Object) Long.valueOf(node.cnt));
                jSONArray.add(jSONObject);
            }
            return jSONArray;
        }
    }

    public ColumnProfilingToJson(ColumnProfiling columnProfiling) {
        this.columnProfiling = columnProfiling;
    }

    public String build(boolean z) {
        JSONObject jSONObject = new JSONObject();
        buildBaseInfo(jSONObject);
        if (this.columnProfiling.dataType == ColumnProfiling.STRING) {
            JSONArray jSONArray = new JSONArray();
            for (int i = 0; i < this.columnProfiling.stringLengths.length; i++) {
                if (this.columnProfiling.stringLengths[i] > 0) {
                    JSONObject jSONObject2 = new JSONObject();
                    jSONObject2.put(String.valueOf(i), (Object) Long.valueOf(this.columnProfiling.stringLengths[i]));
                    jSONArray.add(jSONObject2);
                }
            }
            jSONObject.put("string_length", (Object) jSONArray);
        }
        if (this.columnProfiling.dataType == ColumnProfiling.BIGINT) {
            jSONObject.put("statistic", toJson(this.columnProfiling.statisticLong));
        } else if (this.columnProfiling.dataType == ColumnProfiling.DOUBLE) {
            jSONObject.put("statistic", toJson(this.columnProfiling.statisticDouble));
        }
        jSONObject.put("top10_frequent_item", buildTop10FrequentItem());
        if (this.columnProfiling.tdigest.size() > 1) {
            JSONArray jSONArray2 = new JSONArray();
            for (double d : new double[]{0.01d, 0.05d, 0.1d, 0.2d, 0.3d, 0.4d, 0.5d, 0.6d, 0.7d, 0.8d, 0.9d, 0.95d, 0.99d}) {
                double doubleValue = new BigDecimal(this.columnProfiling.tdigest.quantile(d)).setScale(4, 4).doubleValue();
                JSONObject jSONObject3 = new JSONObject();
                jSONObject3.put(String.valueOf(d), (Object) Double.valueOf(doubleValue));
                jSONArray2.add(jSONObject3);
            }
            jSONObject.put("quantiles", (Object) jSONArray2);
        }
        jSONObject.put("histograms", buildHistograms());
        if (z) {
            repair(jSONObject);
        }
        return jSONObject.toString();
    }

    public void repair(JSONObject jSONObject) {
        long cardinality = this.columnProfiling.cardinalityEstimation.cardinality();
        jSONObject.put("unique_key_cnt", (Object) Long.valueOf(Math.min(cardinality, this.columnProfiling.calcRecordCnt)));
        if (this.columnProfiling.calcRecordCnt <= ColumnProfiling.MIN_RECORD_CNT_CHANGE_PROFILING_METHOD || (cardinality * 1.0d) / this.columnProfiling.calcRecordCnt <= MIN_UNIQUE_KEY_RATE_REMOVE_TOP10_FREQUENT_ITEM) {
            return;
        }
        JSONArray jSONArray = jSONObject.getJSONArray("top10_frequent_item");
        for (int i = 0; i < jSONArray.size(); i++) {
            JSONObject jSONObject2 = jSONArray.getJSONObject(i);
            jSONObject2.put(jSONObject2.keySet().iterator().next(), (Object) 1);
        }
    }

    public void buildBaseInfo(JSONObject jSONObject) {
        Object obj = "unknow";
        if (this.columnProfiling.dataType == ColumnProfiling.STRING) {
            obj = "string";
        } else if (this.columnProfiling.dataType == ColumnProfiling.BIGINT) {
            obj = "bigint";
        } else if (this.columnProfiling.dataType == ColumnProfiling.DOUBLE) {
            obj = "double";
        }
        jSONObject.put("data_type", obj);
        jSONObject.put("total_record_cnt", (Object) Long.valueOf(this.columnProfiling.totalRecordCnt));
        jSONObject.put("calc_record_cnt", (Object) Long.valueOf(this.columnProfiling.calcRecordCnt));
        jSONObject.put("null_record_cnt", (Object) Long.valueOf(this.columnProfiling.nullRecordCnt));
        jSONObject.put("blank_record_cnt", (Object) Long.valueOf(this.columnProfiling.blankRecordCnt));
        jSONObject.put("unique_key_cnt", (Object) Long.valueOf(this.columnProfiling.cardinalityEstimation.cardinality()));
        if (this.columnProfiling.dataType == ColumnProfiling.STRING) {
            int i = this.columnProfiling.stringMinLength;
            if (this.columnProfiling.nullRecordCnt + this.columnProfiling.blankRecordCnt > 0) {
                i = 0;
            }
            jSONObject.put("min_length", (Object) Integer.valueOf(i));
            jSONObject.put("max_length", (Object) Integer.valueOf(this.columnProfiling.stringMaxLength));
        }
    }

    public JSONArray buildHistograms() {
        JSONArray jSONArray = new JSONArray();
        JSONObject jSONObject = new JSONObject();
        jSONObject.put("0" + HISTOGRAMS_SPLIT_CHAR + "0", (Object) "1");
        jSONArray.add(jSONObject);
        if (this.columnProfiling.calcRecordCnt <= 0) {
            return jSONArray;
        }
        if (this.columnProfiling.dataType == ColumnProfiling.STRING) {
            return Histograms.getJsonArray(this.columnProfiling.stringLengths, this.columnProfiling.stringMaxLength);
        }
        if (this.columnProfiling.cardinalityEstimation.cardinality() < HISTOGRAMS_CNT) {
            JSONArray jSONArray2 = new JSONArray();
            Iterator<FrequentItem.Node> it2 = this.columnProfiling.frequentItem.getTopN().iterator();
            while (it2.hasNext()) {
                FrequentItem.Node next = it2.next();
                JSONObject jSONObject2 = new JSONObject();
                jSONObject2.put(next.getKey() + HISTOGRAMS_SPLIT_CHAR + next.getKey(), (Object) Long.valueOf(next.getCnt()));
                jSONArray2.add(jSONObject2);
            }
            return jSONArray2;
        }
        if (this.columnProfiling.tdigest.size() <= 1) {
            return jSONArray;
        }
        double d = 0.0d;
        double d2 = 0.0d;
        if (this.columnProfiling.dataType == ColumnProfiling.BIGINT) {
            d = this.columnProfiling.statisticLong.min();
            d2 = this.columnProfiling.statisticLong.max();
        } else if (this.columnProfiling.dataType == ColumnProfiling.DOUBLE) {
            d = this.columnProfiling.statisticDouble.min();
            d2 = this.columnProfiling.statisticDouble.max();
        }
        return Histograms.getJsonArray(this.columnProfiling.tdigest, d, d2, this.columnProfiling.dataType);
    }

    public JSONArray buildTop10FrequentItem() {
        JSONArray jSONArray = new JSONArray();
        ArrayList<FrequentItem.Node> topN = this.columnProfiling.frequentItem.getTopN();
        if (this.columnProfiling.nullRecordCnt > 0) {
            topN.add(new FrequentItem.Node("NULL", this.columnProfiling.nullRecordCnt));
            Collections.sort(topN, new Comparator<FrequentItem.Node>() { // from class: com.alibaba.dt.onedata3.profiling.udaf.ColumnProfilingToJson.1
                @Override // java.util.Comparator
                public int compare(FrequentItem.Node node, FrequentItem.Node node2) {
                    return new Long(node2.getCnt()).compareTo(Long.valueOf(node.getCnt()));
                }
            });
            if (topN.size() > 10) {
                topN = topN.subList(0, 10);
            }
        }
        for (FrequentItem.Node node : topN) {
            JSONObject jSONObject = new JSONObject();
            jSONObject.put(node.getKey(), (Object) Long.valueOf(node.getCnt()));
            jSONArray.add(jSONObject);
        }
        return jSONArray;
    }

    private static JSONObject toJson(StatisticLong statisticLong) {
        JSONObject jSONObject = new JSONObject();
        jSONObject.put("variance", (Object) Double.valueOf(statisticLong.variance()));
        jSONObject.put("standard_deviation", (Object) Double.valueOf(statisticLong.standardDeviation()));
        jSONObject.put("mean", (Object) Double.valueOf(statisticLong.mean()));
        jSONObject.put("min", (Object) Long.valueOf(statisticLong.min()));
        jSONObject.put("max", (Object) Long.valueOf(statisticLong.max()));
        jSONObject.put("negative_record_cnt", (Object) Long.valueOf(statisticLong.negativeRecordCnt()));
        jSONObject.put("zero_record_cnt", (Object) Long.valueOf(statisticLong.zeroRecordCnt()));
        jSONObject.put("record_cnt", (Object) Long.valueOf(statisticLong.recordCnt()));
        return jSONObject;
    }

    private static JSONObject toJson(StatisticDouble statisticDouble) {
        JSONObject jSONObject = new JSONObject();
        jSONObject.put("variance", (Object) Double.valueOf(statisticDouble.variance()));
        jSONObject.put("standard_deviation", (Object) Double.valueOf(statisticDouble.standardDeviation()));
        jSONObject.put("mean", (Object) Double.valueOf(statisticDouble.mean()));
        jSONObject.put("min", (Object) Double.valueOf(statisticDouble.min()));
        jSONObject.put("max", (Object) Double.valueOf(statisticDouble.max()));
        jSONObject.put("negative_record_cnt", (Object) Long.valueOf(statisticDouble.negativeRecordCnt()));
        jSONObject.put("zero_record_cnt", (Object) Long.valueOf(statisticDouble.zeroRecordCnt()));
        jSONObject.put("record_cnt", (Object) Long.valueOf(statisticDouble.recordCnt()));
        return jSONObject;
    }
}
