package weka.knowledgeflow.steps;

import com.github.houbb.heaven.util.lang.BoolUtil;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.jboss.netty.handler.codec.http.multipart.DiskFileUpload;
import weka.core.Attribute;
import weka.core.Environment;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.OptionMetadata;
import weka.core.WekaException;
import weka.gui.FilePropertyMetadata;
import weka.gui.ProgrammaticProperty;
import weka.knowledgeflow.Data;
import weka.knowledgeflow.StepManager;

@KFStep(name = "Sorter", category = "Tools", toolTipText = "Sort instances in ascending or descending order according to the values of user-specified attributes. Instances can be sorted according to multiple attributes (defined in order). Handles datasets larger than can be fit into main memory via instance connections and specifying the in-memory buffer size. Implements a merge-sort by writing the sorted in-memory buffer to a file when full and then interleaving instances from the disk-based file(s) when the incoming stream has finished.", iconPath = "weka/gui/knowledgeflow/icons/Sorter.gif")
/* loaded from: input_file:WEB-INF/lib/weka-stable-3.8.5.jar:weka/knowledgeflow/steps/Sorter.class */
public class Sorter extends BaseStep {
    private static final long serialVersionUID = 3373283983192467264L;
    protected transient SortComparator m_sortComparator;
    protected transient List<InstanceHolder> m_incrementalBuffer;
    protected transient List<File> m_bufferFiles;
    protected Map<String, Integer> m_stringAttIndexes;
    protected String m_sortDetails;
    protected Instances m_connectedFormat;
    protected boolean m_isReset;
    protected boolean m_streaming;
    protected Data m_streamingData;
    protected String m_bufferSize = "10000";
    protected int m_bufferSizeI = 10000;
    protected File m_tempDirectory = new File("");

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:WEB-INF/lib/weka-stable-3.8.5.jar:weka/knowledgeflow/steps/Sorter$InstanceHolder.class */
    public static class InstanceHolder implements Serializable {
        private static final long serialVersionUID = -3985730394250172995L;
        protected Instance m_instance;
        protected int m_fileNumber;
        protected Map<String, String> m_stringVals;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:WEB-INF/lib/weka-stable-3.8.5.jar:weka/knowledgeflow/steps/Sorter$SortComparator.class */
    public static class SortComparator implements Comparator<InstanceHolder> {
        protected List<SortRule> m_sortRules;

        public SortComparator(List<SortRule> list) {
            this.m_sortRules = list;
        }

        @Override // java.util.Comparator
        public int compare(InstanceHolder instanceHolder, InstanceHolder instanceHolder2) {
            Iterator<SortRule> it = this.m_sortRules.iterator();
            while (it.hasNext()) {
                int compare = it.next().compare(instanceHolder, instanceHolder2);
                if (compare != 0) {
                    return compare;
                }
            }
            return 0;
        }
    }

    /* loaded from: input_file:WEB-INF/lib/weka-stable-3.8.5.jar:weka/knowledgeflow/steps/Sorter$SortRule.class */
    public static class SortRule implements Comparator<InstanceHolder> {
        protected String m_attributeNameOrIndex;
        protected Attribute m_attribute;
        protected boolean m_descending;

        public SortRule(String str, boolean z) {
            this.m_attributeNameOrIndex = str;
            this.m_descending = z;
        }

        public SortRule() {
        }

        public SortRule(String str) {
            parseFromInternal(str);
        }

        protected void parseFromInternal(String str) {
            String[] split = str.split("@@SR@@");
            if (split.length != 2) {
                throw new IllegalArgumentException("Malformed sort rule: " + str);
            }
            this.m_attributeNameOrIndex = split[0].trim();
            this.m_descending = split[1].equalsIgnoreCase(BoolUtil.Y);
        }

        public String toStringInternal() {
            return this.m_attributeNameOrIndex + "@@SR@@" + (this.m_descending ? BoolUtil.Y : "N");
        }

        public String toString() {
            StringBuffer stringBuffer = new StringBuffer();
            stringBuffer.append("Attribute: " + this.m_attributeNameOrIndex + " - sort " + (this.m_descending ? "descending" : "ascending"));
            return stringBuffer.toString();
        }

        public void setAttribute(String str) {
            this.m_attributeNameOrIndex = str;
        }

        public String getAttribute() {
            return this.m_attributeNameOrIndex;
        }

        public void setDescending(boolean z) {
            this.m_descending = z;
        }

        public boolean getDescending() {
            return this.m_descending;
        }

        public void init(Environment environment, Instances instances) {
            String str = this.m_attributeNameOrIndex;
            try {
                str = environment.substitute(str);
            } catch (Exception e) {
            }
            if (str.equalsIgnoreCase("/first")) {
                this.m_attribute = instances.attribute(0);
                return;
            }
            if (str.equalsIgnoreCase("/last")) {
                this.m_attribute = instances.attribute(instances.numAttributes() - 1);
                return;
            }
            this.m_attribute = instances.attribute(str);
            if (this.m_attribute == null) {
                try {
                    this.m_attribute = instances.attribute(Integer.parseInt(str));
                } catch (NumberFormatException e2) {
                    throw new IllegalArgumentException("Unable to locate attribute " + str + " as either a named attribute or as a valid attribute index");
                }
            }
        }

        @Override // java.util.Comparator
        public int compare(InstanceHolder instanceHolder, InstanceHolder instanceHolder2) {
            int compareTo;
            if (instanceHolder.m_instance.isMissing(this.m_attribute) && instanceHolder2.m_instance.isMissing(this.m_attribute)) {
                return 0;
            }
            if (instanceHolder.m_instance.isMissing(this.m_attribute)) {
                return 1;
            }
            if (instanceHolder2.m_instance.isMissing(this.m_attribute)) {
                return -1;
            }
            if (!this.m_attribute.isString() && !this.m_attribute.isRelationValued()) {
                compareTo = Double.compare(instanceHolder.m_instance.value(this.m_attribute), instanceHolder2.m_instance.value(this.m_attribute));
            } else {
                if (!this.m_attribute.isString()) {
                    throw new IllegalArgumentException("Can't sort according to relation-valued attribute values!");
                }
                compareTo = instanceHolder.m_stringVals.get(this.m_attribute.name()).compareTo(instanceHolder2.m_stringVals.get(this.m_attribute.name()));
            }
            return this.m_descending ? -compareTo : compareTo;
        }
    }

    public String getBufferSize() {
        return this.m_bufferSize;
    }

    @OptionMetadata(displayName = "Size of in-mem streaming buffer", description = "Number of instances to sort in memory before writing to a temp file (instance connections only)", displayOrder = 1)
    public void setBufferSize(String str) {
        this.m_bufferSize = str;
    }

    @FilePropertyMetadata(fileChooserDialogType = 0, directoriesOnly = true)
    @OptionMetadata(displayName = "Directory for temp files", description = "Where to store temporary files when spilling to disk", displayOrder = 2)
    public void setTempDirectory(File file) {
        this.m_tempDirectory = file;
    }

    public File getTempDirectory() {
        return this.m_tempDirectory;
    }

    @ProgrammaticProperty
    public void setSortDetails(String str) {
        this.m_sortDetails = str;
    }

    public String getSortDetails() {
        return this.m_sortDetails;
    }

    @Override // weka.knowledgeflow.steps.Step, weka.knowledgeflow.steps.BaseStepExtender
    public void stepInit() throws WekaException {
        this.m_isReset = true;
        this.m_streaming = false;
        this.m_stringAttIndexes = new HashMap();
        this.m_bufferFiles = new ArrayList();
        this.m_streamingData = new Data("instance");
        if (this.m_sortDetails == null || this.m_sortDetails.length() == 0) {
            throw new WekaException("Need at least one rule defined by which to sort the data!");
        }
    }

    @Override // weka.knowledgeflow.steps.Step, weka.knowledgeflow.steps.BaseStepExtender
    public List<String> getIncomingConnectionTypes() {
        if (getStepManager().numIncomingConnections() == 0) {
            return Arrays.asList("instance", StepManager.CON_DATASET, StepManager.CON_TRAININGSET, StepManager.CON_TESTSET);
        }
        return null;
    }

    @Override // weka.knowledgeflow.steps.Step, weka.knowledgeflow.steps.BaseStepExtender
    public List<String> getOutgoingConnectionTypes() {
        ArrayList arrayList = new ArrayList();
        if (getStepManager().numIncomingConnectionsOfType("instance") > 0) {
            arrayList.add("instance");
        }
        if (getStepManager().numIncomingConnectionsOfType(StepManager.CON_DATASET) > 0) {
            arrayList.add(StepManager.CON_DATASET);
        }
        if (getStepManager().numIncomingConnectionsOfType(StepManager.CON_TRAININGSET) > 0) {
            arrayList.add(StepManager.CON_TRAININGSET);
        }
        if (getStepManager().numIncomingConnectionsOfType(StepManager.CON_TESTSET) > 0) {
            arrayList.add(StepManager.CON_TESTSET);
        }
        return arrayList;
    }

    protected void init(Instances instances) {
        this.m_connectedFormat = instances;
        ArrayList arrayList = new ArrayList();
        if (this.m_sortDetails != null && this.m_sortDetails.length() > 0) {
            for (String str : this.m_sortDetails.split("@@sort-rule@@")) {
                SortRule sortRule = new SortRule(str.trim());
                sortRule.init(getStepManager().getExecutionEnvironment().getEnvironmentVariables(), instances);
                arrayList.add(sortRule);
            }
            this.m_sortComparator = new SortComparator(arrayList);
        }
        this.m_stringAttIndexes = new HashMap();
        for (int i = 0; i < instances.numAttributes(); i++) {
            if (instances.attribute(i).isString()) {
                this.m_stringAttIndexes.put(instances.attribute(i).name(), new Integer(i));
            }
        }
        if (this.m_stringAttIndexes.size() == 0) {
            this.m_stringAttIndexes = null;
        }
        if (this.m_streaming) {
            this.m_bufferSizeI = Integer.parseInt(environmentSubstitute(this.m_bufferSize));
            this.m_incrementalBuffer = new ArrayList(this.m_bufferSizeI);
            getStepManager().logBasic("Starting streaming sort. Using streaming buffer size: " + this.m_bufferSizeI);
        }
    }

    @Override // weka.knowledgeflow.steps.BaseStep, weka.knowledgeflow.steps.Step, weka.knowledgeflow.steps.BaseStepExtender
    public void processIncoming(Data data) throws WekaException {
        Instances instances;
        if (this.m_isReset) {
            if (data.getConnectionName().equals("instance")) {
                instances = new Instances(((Instance) data.getPrimaryPayload()).dataset(), 0);
                this.m_streaming = true;
                this.m_isReset = false;
            } else {
                instances = new Instances((Instances) data.getPrimaryPayload(), 0);
            }
            init(instances);
        }
        if (this.m_streaming) {
            processIncremental(data);
        } else {
            processBatch(data);
        }
        if (isStopRequested()) {
            getStepManager().interrupted();
        } else {
            if (this.m_streaming) {
                return;
            }
            getStepManager().finished();
        }
    }

    protected void processBatch(Data data) throws WekaException {
        getStepManager().processing();
        Instances instances = (Instances) data.getPrimaryPayload();
        getStepManager().logBasic("Sorting " + instances.relationName());
        ArrayList arrayList = new ArrayList();
        for (int i = 0; i < instances.numInstances(); i++) {
            InstanceHolder instanceHolder = new InstanceHolder();
            instanceHolder.m_instance = instances.instance(i);
            arrayList.add(instanceHolder);
        }
        Collections.sort(arrayList, this.m_sortComparator);
        Instances instances2 = new Instances(instances, 0);
        for (int i2 = 0; i2 < arrayList.size(); i2++) {
            instances2.add(((InstanceHolder) arrayList.get(i2)).m_instance);
        }
        Data data2 = new Data(data.getConnectionName(), instances2);
        data2.setPayloadElement(StepManager.CON_AUX_DATA_SET_NUM, data.getPayloadElement(StepManager.CON_AUX_DATA_SET_NUM));
        data2.setPayloadElement(StepManager.CON_AUX_DATA_MAX_SET_NUM, data.getPayloadElement(StepManager.CON_AUX_DATA_MAX_SET_NUM));
        getStepManager().outputData(data2);
    }

    protected void processIncremental(Data data) throws WekaException {
        if (isStopRequested()) {
            return;
        }
        if (getStepManager().isStreamFinished(data)) {
            emitBufferedInstances();
            return;
        }
        getStepManager().throughputUpdateStart();
        InstanceHolder instanceHolder = new InstanceHolder();
        instanceHolder.m_instance = (Instance) data.getPrimaryPayload();
        instanceHolder.m_fileNumber = -1;
        if (this.m_stringAttIndexes != null) {
            copyStringAttVals(instanceHolder);
        }
        this.m_incrementalBuffer.add(instanceHolder);
        if (this.m_incrementalBuffer.size() == this.m_bufferSizeI) {
            try {
                sortBuffer(true);
            } catch (Exception e) {
                throw new WekaException(e);
            }
        }
        getStepManager().throughputUpdateEnd();
    }

    protected void emitBufferedInstances() throws WekaException {
        if (isStopRequested()) {
            return;
        }
        if (this.m_incrementalBuffer.size() > 0) {
            try {
                getStepManager().throughputUpdateStart();
                sortBuffer(false);
                getStepManager().throughputUpdateEnd();
                if (this.m_bufferFiles.size() == 0) {
                    getStepManager().logDetailed("Emitting in memory buffer");
                    Instances instances = new Instances(this.m_incrementalBuffer.get(0).m_instance.dataset(), 0);
                    for (int i = 0; i < this.m_incrementalBuffer.size(); i++) {
                        getStepManager().throughputUpdateStart();
                        InstanceHolder instanceHolder = this.m_incrementalBuffer.get(i);
                        instanceHolder.m_instance.setDataset(instances);
                        if (this.m_stringAttIndexes != null) {
                            for (String str : this.m_stringAttIndexes.keySet()) {
                                boolean z = instances.attribute(str).numValues() > 0;
                                instances.attribute(str).setStringValue(instanceHolder.m_stringVals.get(str));
                                if (z) {
                                    instanceHolder.m_instance.setValue(instances.attribute(str), 0.0d);
                                }
                            }
                        }
                        if (isStopRequested()) {
                            return;
                        }
                        this.m_streamingData.setPayloadElement("instance", instanceHolder.m_instance);
                        getStepManager().throughputUpdateEnd();
                        getStepManager().outputData(this.m_streamingData);
                        if (i == this.m_incrementalBuffer.size() - 1) {
                            this.m_streamingData.clearPayload();
                            getStepManager().throughputFinished(this.m_streamingData);
                        }
                    }
                    return;
                }
            } catch (Exception e) {
                throw new WekaException(e);
            }
        }
        ArrayList arrayList = new ArrayList();
        ArrayList<InstanceHolder> arrayList2 = new ArrayList();
        Instances instances2 = new Instances(this.m_connectedFormat, 0);
        if (this.m_incrementalBuffer.size() > 0) {
            arrayList2.add(this.m_incrementalBuffer.remove(0));
        }
        if (isStopRequested()) {
            return;
        }
        if (this.m_bufferFiles.size() > 0) {
            getStepManager().logDetailed("Merging temp files");
        }
        for (int i2 = 0; i2 < this.m_bufferFiles.size(); i2++) {
            ObjectInputStream objectInputStream = null;
            try {
                objectInputStream = new ObjectInputStream(new BufferedInputStream(new FileInputStream(this.m_bufferFiles.get(i2)), 50000));
                InstanceHolder instanceHolder2 = (InstanceHolder) objectInputStream.readObject();
                if (instanceHolder2 != null) {
                    arrayList.add(objectInputStream);
                    instanceHolder2.m_fileNumber = i2;
                    arrayList2.add(instanceHolder2);
                } else {
                    objectInputStream.close();
                }
            } catch (Exception e2) {
                if (objectInputStream != null) {
                    try {
                        objectInputStream.close();
                    } catch (Exception e3) {
                        throw new WekaException(e3);
                    }
                }
                throw new WekaException(e2);
            }
        }
        Collections.sort(arrayList2, this.m_sortComparator);
        int i3 = 0;
        while (!isStopRequested()) {
            InstanceHolder instanceHolder3 = (InstanceHolder) arrayList2.remove(0);
            instanceHolder3.m_instance.setDataset(instances2);
            if (this.m_stringAttIndexes != null) {
                for (String str2 : this.m_stringAttIndexes.keySet()) {
                    boolean z2 = instances2.attribute(str2).numValues() > 1;
                    instances2.attribute(str2).setStringValue(instanceHolder3.m_stringVals.get(str2));
                    if (z2) {
                        instanceHolder3.m_instance.setValue(instances2.attribute(str2), 0.0d);
                    }
                }
            }
            this.m_streamingData.setPayloadElement("instance", instanceHolder3.m_instance);
            i3++;
            getStepManager().outputData(this.m_streamingData);
            getStepManager().throughputUpdateStart();
            if (i3 % this.m_bufferSizeI == 0) {
                getStepManager().logDetailed("Merged " + i3 + " instances");
            }
            int i4 = instanceHolder3.m_fileNumber;
            InstanceHolder instanceHolder4 = null;
            if (i4 != -1) {
                ObjectInputStream objectInputStream2 = (ObjectInputStream) arrayList.get(i4);
                try {
                    InstanceHolder instanceHolder5 = (InstanceHolder) objectInputStream2.readObject();
                    if (instanceHolder5 == null) {
                        throw new Exception("end of buffer");
                        break;
                    } else {
                        instanceHolder4 = instanceHolder5;
                        instanceHolder4.m_fileNumber = i4;
                    }
                } catch (Exception e4) {
                    try {
                        getStepManager().logDetailed("Closing temp file");
                        objectInputStream2.close();
                        this.m_bufferFiles.remove(i4);
                        arrayList.remove(i4);
                        for (InstanceHolder instanceHolder6 : arrayList2) {
                            if (instanceHolder6.m_fileNumber != -1 && instanceHolder6.m_fileNumber > i4) {
                                instanceHolder6.m_fileNumber--;
                            }
                        }
                    } catch (Exception e5) {
                        throw new WekaException(e4);
                    }
                }
            } else if (this.m_incrementalBuffer.size() > 0) {
                instanceHolder4 = this.m_incrementalBuffer.remove(0);
                instanceHolder4.m_fileNumber = -1;
            }
            if (instanceHolder4 != null) {
                int binarySearch = Collections.binarySearch(arrayList2, instanceHolder4, this.m_sortComparator);
                if (binarySearch < 0) {
                    arrayList2.add((binarySearch * (-1)) - 1, instanceHolder4);
                } else {
                    arrayList2.add(binarySearch, instanceHolder4);
                }
            }
            getStepManager().throughputUpdateEnd();
            if (arrayList2.size() <= 0 || isStopRequested()) {
                if (!isStopRequested()) {
                    this.m_streamingData.clearPayload();
                    getStepManager().throughputFinished(this.m_streamingData);
                    return;
                } else {
                    Iterator it = arrayList.iterator();
                    while (it.hasNext()) {
                        try {
                            ((ObjectInputStream) it.next()).close();
                        } catch (Exception e6) {
                        }
                    }
                    return;
                }
            }
        }
    }

    private void sortBuffer(boolean z) throws Exception {
        getStepManager().logBasic("Sorting in memory buffer");
        Collections.sort(this.m_incrementalBuffer, this.m_sortComparator);
        if (z && !isStopRequested()) {
            String file = this.m_tempDirectory.toString();
            File createTempFile = File.createTempFile("Sorter", DiskFileUpload.postfix);
            if (file != null && file.length() > 0) {
                String environmentSubstitute = environmentSubstitute(file);
                File file2 = new File(environmentSubstitute);
                if (file2.exists() && file2.canWrite()) {
                    createTempFile = new File(environmentSubstitute + File.separator + createTempFile.getName());
                    createTempFile.deleteOnExit();
                }
            }
            getStepManager().logDebug("Temp file: " + createTempFile.toString());
            this.m_bufferFiles.add(createTempFile);
            BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(new FileOutputStream(createTempFile), 50000);
            ObjectOutputStream objectOutputStream = new ObjectOutputStream(bufferedOutputStream);
            getStepManager().logDetailed("Writing buffer to temp file " + this.m_bufferFiles.size() + ". Buffer contains " + this.m_incrementalBuffer.size() + " instances");
            for (int i = 0; i < this.m_incrementalBuffer.size(); i++) {
                InstanceHolder instanceHolder = this.m_incrementalBuffer.get(i);
                instanceHolder.m_instance.setDataset(null);
                objectOutputStream.writeObject(instanceHolder);
                if (i % (this.m_bufferSizeI / 10) == 0) {
                    objectOutputStream.reset();
                }
            }
            bufferedOutputStream.flush();
            objectOutputStream.close();
            this.m_incrementalBuffer.clear();
        }
    }

    private void copyStringAttVals(InstanceHolder instanceHolder) {
        for (String str : this.m_stringAttIndexes.keySet()) {
            String stringValue = instanceHolder.m_instance.stringValue(instanceHolder.m_instance.dataset().attribute(str));
            if (instanceHolder.m_stringVals == null) {
                instanceHolder.m_stringVals = new HashMap();
            }
            instanceHolder.m_stringVals.put(str, stringValue);
        }
    }

    @Override // weka.knowledgeflow.steps.BaseStep, weka.knowledgeflow.steps.Step
    public String getCustomEditorForStep() {
        return "weka.gui.knowledgeflow.steps.SorterStepEditorDialog";
    }
}
