Register Now

Login

Lost Password

Lost your password? Please enter your email address. You will receive a link and will create a new password via email.

Login

Register Now

Welcome to All Test Answers

A-Priori and PCY algorithms implementation using java – Mining Frequent Itemsets

The main objective of this project is to find frequent itemsets by implementing two efficient algorithms: A-Priori and PCY. The goal is to find frequent pairs of elements. You do not need to find triples and larger itemsets.

Dataset

The retail dataset contains anonymized retail market basket data (88K baskets) from an anonymous retail store. The preprocessing step to map text labels into integers has already been done. Use Sublime Text, TextPad or Notepad++ or other software to open the file. Do not use Notepad.

Experiments

Perform the scalability study for finding frequent pairs of elements by dividing the dataset into different chunks and measure the time performance. Provide the line chart. Provide results for the following support thresholds: 1%, 5%, 10%. For example, if your chuck is 10% of the dataset, you have around 8,800 baskets. Therefore, if your support threshold is 5%, you should count the pairs that appear in at least 440 baskets. See three samples below for three different support thresholds. Note, the sample charts contain hypothetical numbers!

Answer:

Tester Class

import java.io.*;
public class Tester {

    public static void main(String[] args) throws IOException {

        long first,last,time_Diff;
        int[] percent = {1, 5, 10};

        for (int per : percent) {

            for (int i = 1; i <= 12; i++) {
				
                String retailInput = "retail" + i + ".txt";
                int size = buckcount(retailInput);
                System.out.println("\nInput file "+i +": "+retailInput);
                System.out.println("Bucket " + size + " read in at " + per + "% support");
				
				int threshold = calc(size, per);
                first = System.currentTimeMillis();
                APriori.setThreshold(threshold);
                APriori.toFindPair(retailInput);
				last = System.currentTimeMillis();
                time_Diff = last - first;
                System.out.println("APriori: " + time_Diff + "ms.");
				
				first = System.currentTimeMillis();
                PCY.setLimit(threshold);
                PCY.toFindPair(retailInput);     
				last = System.currentTimeMillis();
                time_Diff = last - first;
				System.out.println("PCY: " + time_Diff + "ms.");
				
				System.out.println("Results: " );
				System.out.println(APriori.toFindPair(retailInput));                                       
            }
        }
    }
	/*method to count the buckets */
    public static int buckcount(String retail) throws IOException {
        InputStream input = new BufferedInputStream(new FileInputStream(retail));
        byte[] i = new byte[1024];
        int readChars,n = 0;
        boolean empty = true;
        while ((readChars = input.read(i)) != -1) {
            empty = false;
            for (int k = 0; k < readChars; ++k) {
            	//new line
                if (i[k] == '\n') {
                    ++n;
                }
            }
        }
        //Check if the line is not empty return 1 otherwise return n
        return (n == 0 && !empty) ? 1 : n;
    }
    /*calculate the amount of support for the required percentage*/ 
    public static int calc(int sum, double per) {
        if (per < 0) {
             return 0;
        } else if (per > 100) {    
			return sum;
        } else {
        	//cast to integer
            return (int) Math.round((per / 100) * sum);
        }
    }

}

Start Class

public class Start {

    final Set1 key;
    Integer value;    

    public Start(Set1 key, Integer value) {
        this.key = key;
        this.value = value;
    }

    public Integer getValue() {
        return value;
    }
	
	public Set1 getKey() {
        return key;
    }
	
    public void setValue(Integer value) {
        this.value = value;
    }

    public String toString() {

        return "[" + key + "=>" + value + "]";
    }
}

Set1 Class


import java.util.*;
public class Set1<T1, T2> {

    //Variables
	public T1 o1,o2;

    //Constructor
	public Set1(T1 o1, T1 o2) {
        this.o1 = o1;
        this.o2 = o2;
    }
	
	//Setter
    public void setPair(T1 o1, T1 o2) {
        this.o1 = o1;
        this.o2 = o2;
    }

    public boolean equals(Object o) {
    	if (this == o) {
            return true;
        }
    	if (!(o instanceof Set1)) {
            return false;
        }
		
        return (o1 == ((Set1) o).o1) && (o2 == ((Set1) o).o2);
    }

    /*This hash function which is used for 2 numbers of type integer*/
    public int hashCode() {
        int hashVar = 321;
        hashVar = 20 * hashVar + Objects.hashCode(this.o1);
        hashVar = 50 * hashVar + Objects.hashCode(this.o2);
        return hashVar;
    }

    public String toString() {
        return "(" + o1 + ", " + o2 + ")";
    }

}

Set2 Class

import java.util.*;
/*This class is used to override the hash code used by java*/
class Set2 <T1, T2> extends Set1 <T1, T2> {

    public Set2(T1 o1, T1 o2) {
        super(o1, o2);
    }

    @Override
    public int hashCode() {
        int hashVar = 20;
        hashVar = 150 * hashVar + Objects.hashCode(this.o1);
        hashVar = 50 * hashVar + Objects.hashCode(this.o2);
        return hashVar;
    }

    @Override
    public boolean equals(Object o) {
        if (!(o instanceof Set1)) {
            return false;
        }
		if (this == o) {
            return true;
        }
        return (o1 == ((Set1) o).o1) && (o2 == ((Set1) o).o2);
    }

}

Bucket Class


import java.util.*;
/*This class is used to hold entries*/
public class Bucket {

	//Variables
    private Start list[];
	private int maxSize = 131072;
	
	//Constructor
	public Bucket(int num) {
        maxSize = num;
        list = new Start[maxSize];
    }
	
    public Bucket() {
        list = new Start[maxSize];
    }
	
    //Getters
    public int getmaxSize() {
        return maxSize;
    }

    public Integer get(Set1 key) {
        int hashVar = Math.abs(key.hashCode() % maxSize);
        Start start = list[hashVar];
        return start.getValue();

    }
    
	/*This is used to convert To BitSet*/
    public BitSet toBit(Integer thresh) {
        BitSet vector = new BitSet(maxSize);

        for (int i = 0; i < maxSize; i++) {
            if (list[i] != null && list[i].getValue() >= thresh) {
                vector.set(i);
            }
        }
        return vector;
    }
	
	/*To convert to a bitvector */   
    public void input(Set1 key, Integer value) {
        int hashVar = Math.abs(key.hashCode() % maxSize);
        Start start = list[hashVar];

        if (start == null) {
        	Start newStart = new Start(key, value);
            list[hashVar] = newStart;         	   
        } else {
        	start.value = value;
        }
    }
	
    /*to checks if hash location has a key entry*/ 
    public boolean haveKey(Set1 key) {
        int hash = Math.abs(key.hashCode() % maxSize);
        return (list[hash] != null);
    }
    
    //to string method to display output on the console
    public String toString() {
        String s;
        s = "{";
        for (Start array : list) {
            if (array != null) {
                s += array;
            }
        }
        s = s+"}";
        return s;
    }

}

APriori Class


import java.io.*;
import java.util.*;
public final class APriori {

    private static int limit;

    //constructor
    private APriori() {
    	limit = 0;
    }

    /*sets the threshold*/
    public static void setThreshold(int max) {
    	limit = max;
    }

    /*finds the frequent pair*/
    public static LinkedHashMap<Set1, Integer> toFindPair(String retail) throws NumberFormatException, IOException {
        
    	HashMap<Integer, Integer> ItemFrequent = AprioriPassOne(retail);
        LinkedHashMap<Set1,Integer> canBeSet = new LinkedHashMap<>();
        String numsLine[],_line = "";
        Integer value;
        ArrayList<Integer> myList = new ArrayList<>();
		FileReader fileReader = new FileReader(retail);
		BufferedReader reader = new BufferedReader(fileReader);
		while ((_line = reader.readLine()) != null) {
			numsLine = _line.split("\\s+");
			for (String num : numsLine) {
				value = Integer.parseInt(num);
				if (ItemFrequent.containsKey(value) && !myList.contains(value)) {
					myList.add(value);
				}
			}
			//use collection class through "java.util.Collections" for sorting
			Collections.sort(myList);
			for (int n = 0; n < myList.size(); n++) {
				for (int k = n + 1; k< myList.size(); k++) {
					Set1<Integer, Integer> val = new Set1<>(myList.get(n), myList.get(k));
					if (canBeSet.containsKey(val)) {
						canBeSet.put(val,
								canBeSet.get(val) + 1);
					} else {
						canBeSet.put(val, 1);
					}
				}
			}
			myList.clear();
		}
		reader.close();
		removeNotFrequent(canBeSet);
        return canBeSet;
    }

    /* to find freq Item Counts*/
    private static HashMap<Integer, Integer> AprioriPassOne(String fileName) throws NumberFormatException, IOException {
        HashMap<Integer, Integer> pass1 = new HashMap<>();

        String numsLine[],_line = "";
        Integer key;
		FileReader readFile = new FileReader(fileName);
		BufferedReader toRead = new BufferedReader(readFile);
		while ((_line = toRead.readLine()) != null) {
			numsLine = _line.split("\\s+");
			for (String num : numsLine) {
				key = Integer.parseInt(num);
				if (pass1.containsKey(key)) {
					pass1.put(key, pass1.get(key) + 1);
				} else {
					pass1.put(key, 1);
				}
			}
		}
		toRead.close();

		removeNotFrequent(pass1);
        return pass1;
    }
	 
	 /*method to check if item is frequent*/
    private static boolean checkFrequancy(int item) {
        return item >= limit;
    }
     /*remove none freq item*/
    private static void removeNotFrequent(HashMap<Integer, Integer> map) {

        Iterator<Map.Entry<Integer, Integer>> check = map.entrySet().iterator();
        while (check.hasNext()) {
            Map.Entry<Integer, Integer> entry = check.next();
            if (!(checkFrequancy(entry.getValue()))) {
                check.remove();
            }
        }
    }
    /*remove none freq pair*/
    private static void removeNotFrequent(LinkedHashMap<Set1, Integer> map) {
        Iterator<Map.Entry<Set1, Integer>> check = map.entrySet().iterator();
        while (check.hasNext()) {
            Map.Entry<Set1, Integer> entry = check.next();
            if (!checkFrequancy(entry.getValue())) {
                check.remove();
            }
        }
    }


}

PCY Class


import java.io.*;
import java.util.*;
public final class PCY {
	
	//Variables
    private static int limit,maxSize = 131072;// set the size to 2^17 
    private static BitSet bit;

    //Setters
    public static void setBucketSize(int bSize) {
        maxSize = bSize;
    }
	
	public static void setLimit(int p ) {
        limit = p ;
    }

    //Link the hash maps
    public static LinkedHashMap<Set1, Integer> toFindPair(String retail) throws NumberFormatException, IOException {
        HashMap<Integer, Integer> ItemFrequent = PCYPassOne(retail);
        LinkedHashMap<Set1, Integer> canBeSet = new LinkedHashMap<>();
        String _line,numberOfLines[];
        Integer key;
        ArrayList<Integer> myList = new ArrayList<>();

        FileReader toReadFile = new FileReader(retail);
		BufferedReader reader = new BufferedReader(toReadFile);
		while ((_line = reader.readLine()) != null) {
			numberOfLines = _line.split("\\s+");
			for (String toNum : numberOfLines) {
				/*convert to integer*/
				key = Integer.parseInt(toNum);
				if (ItemFrequent.containsKey(key) && !myList.contains(key)) {
					myList.add(key);
				}
			}
			/*sort myList*/
			Collections.sort(myList);
			for (int n = 0; n < myList.size(); n++) {
				/*create pairs*/
				for (int k = n + 1; k < myList.size(); k++) {
					Set1<Integer, Integer> val = new Set1<>(myList.get(n), myList.get(k));
					if (!bit.get(Math.abs(val.hashCode()) % maxSize)) {
						continue;
					}
					if (canBeSet.containsKey(val)) {
						canBeSet.put(val, canBeSet.get(val) + 1);
					} else {
						canBeSet.put(val, 1);
					}
				}
			}
			myList.clear();

		}
		reader.close();

		removeNotFrequent(canBeSet);
		/*return frequent pairs and number of support*/
        return canBeSet;
    }

    /* first pass -- to find freq Item Counts */
    private static HashMap<Integer,Integer> PCYPassOne(String fileName) throws NumberFormatException, IOException {
       //Decelerations
    	HashMap<Integer, Integer> pass1 = new HashMap<>();
        Bucket firstBucket = new Bucket(maxSize);
        String totalLine[], _lin = "";
        Integer value;
        ArrayList<Integer> myList = new ArrayList<>();        
		FileReader readFile = new FileReader(fileName);
		BufferedReader reader = new BufferedReader(readFile);
		while ((_lin = reader.readLine()) != null) {
			totalLine = _lin.split("\\s+");
			for (String num : totalLine) {
				value = Integer.parseInt(num);
				if (pass1.containsKey(value)) {
					pass1.put(value, pass1.get(value) + 1);
				} else {
					pass1.put(value, 1);
				}
				if (!myList.contains(value)) {
					myList.add(value);
				}
			}
			Collections.sort(myList);
			for (int n = 0; n < myList.size(); n++) {
				for (int k = n + 1; k < myList.size(); k++) {
					Set1 <Integer, Integer> val = new Set1 < >(myList.get(n), myList.get(k));

					if (firstBucket.haveKey(val)) {
						firstBucket.input(val, firstBucket.get(val) + 1);
					} else {
						firstBucket.input(val, 1);
					}
				}
			}
			myList.clear();
		}
		reader.close();       
		removeNotFrequent(pass1);
        bit = firstBucket.toBit(limit);
        firstBucket = null; 
        return pass1;
    }
    private static boolean checkFrequancy(int num) {
        return num >= limit;
    }
    /*To remove none freq. Pair*/
    private static void removeNotFrequent(HashMap<Integer, Integer> map) {

        Iterator<Map.Entry<Integer, Integer>> check = map.entrySet().iterator();
        while (check.hasNext()) {
            Map.Entry<Integer, Integer> entry = check.next();
            if (!checkFrequancy(entry.getValue())) {
                check.remove();
            }
        }
    }
    /*To remove none freq. Pair*/
    private static void removeNotFrequent(LinkedHashMap<Set1, Integer> map) {

        Iterator<Map.Entry<Set1, Integer>> check = map.entrySet().iterator();
        while (check.hasNext()) {
            Map.Entry<Set1,Integer> entry = check.next();
            if (!checkFrequancy(entry.getValue())) {
                check.remove();
            }
        }
    }
}

Mining Frequent Itemsets

Mining Frequent Itemsets

Mining Frequent Itemsets

About

Leave a reply

Captcha Click on image to update the captcha .

error: Content is protected !!