1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package jr239.co620;
19
20 import org.apache.log4j.Logger;
21
22
23 import weka.classifiers.Evaluation;
24 import weka.core.Instances;
25 import weka.core.Instance;
26 import weka.classifiers.CostMatrix;
27 import weka.classifiers.Classifier;
28 import weka.classifiers.bayes.NaiveBayes;
29 import weka.core.converters.ArffLoader ;
30 import weka.core.FastVector;
31
32 import weka.core.Attribute;
33
34 import weka.gui.LogPanel;
35 import weka.core.converters.ConverterUtils.*;
36 import weka.estimators.EstimatorUtils;
37
38
39 import java.util.Random;
40 import java.util.ArrayList;
41 import java.util.Arrays;
42 import java.io.File;
43 import java.io.IOException;
44
45 import java.util.HashMap;
46 import java.util.HashSet;
47 import java.util.Properties;
48
49 import java.io.FileWriter;
50
51 import java.io.FileReader;
52
53
54 public class SimpleBayesDataProcessor {
55
56
57 protected static SimpleBayesDataProcessor getInstance(LogPanel lw){
58 synchronized(SimpleBayesDataProcessor.class) {
59
60 if (singleInstace == null){
61 wl = lw;
62 singleInstace = new SimpleBayesDataProcessor();
63 }
64 }
65
66 return singleInstace;
67 }
68
69
70
71
72
73
74
75
76 protected static final Instances getInitialDataSet(){
77 return initialDataSet0;
78 }
79
80
81
82
83 protected static final Object[] cehckJAminCount(final int minCountPer100){
84 boolean minCount = true;
85 double r = ((double)minCountPer100)/(100.00 );
86
87 double minC = r * ((double)joinedAttributesDataSet.numInstances());
88 Object[] check = new Object[2];
89 int[] AttrbValuesCounts = null;
90 double normalizedMinCount = -9.99;
91
92 AttrbValuesCounts = joinedAttributesDataSet.attributeStats(jap).nominalCounts;
93 normalizedMinCount = setNormalizedMinCountValue( AttrbValuesCounts);
94
95 for( int valueCountIndex = 0 ; valueCountIndex < AttrbValuesCounts.length; valueCountIndex ++ ){
96 if( AttrbValuesCounts[valueCountIndex] < minC ){
97 minCount = false;
98 break;
99 }
100 }
101 check[0] = minCount;
102 check[1] = normalizedMinCount;
103 return check;
104 }
105
106 protected static int getMinCount(){
107 return minValsCount;
108 }
109
110
111 private static double setNormalizedMinCountValue(int[] AttrbValuesCount ){
112 double normalizedMinCounts= -9.99;
113 HashSet<Integer> uniqueCountsValues = new HashSet<Integer>();
114 for( int i = 0; i < AttrbValuesCount.length; i++ ){
115 uniqueCountsValues.add(AttrbValuesCount[i]);
116 }
117 Integer[] cl = uniqueCountsValues.toArray(new Integer[uniqueCountsValues.size()]);
118 Arrays.sort(cl);
119 normalizedMinCounts = (cl[0] ) / ((double) ( joinedAttributesDataSet.numInstances() ) );
120
121 return normalizedMinCounts;
122 }
123
124
125 private static Attribute
126 getJoinedAttribute( Attribute a, Attribute b){
127
128 int anv = a.numValues(), bnv = b.numValues();
129 FastVector jointAttrbNmeVals = new FastVector(anv*bnv);
130
131 for ( int av= 0; av < anv ; av++){
132 for( int bv = 0; bv < bnv; bv++ ){
133 jointAttrbNmeVals.addElement(a.value( av) + JOIN + b.value(bv) );
134 }
135 }
136
137 return new Attribute ( (a.name() + JOIN + b.name()) ,
138 jointAttrbNmeVals
139 );
140 }
141
142
143
144 private static
145 Attribute getJoinedAtrrbDef (){
146
147 Attribute a = initialDataSetAttributesDefinition [ attrsToJoinIndxs[ 0 ] ];
148 Attribute b = initialDataSetAttributesDefinition [ attrsToJoinIndxs[ 1] ];
149 joinedAttrb = getJoinedAttribute(a,b);
150
151 for( int i = 2; i < ajn ; i++){
152
153 a = joinedAttrb ;
154 b = initialDataSetAttributesDefinition [ attrsToJoinIndxs[ i ] ];
155 joinedAttrb = getJoinedAttribute(a,b);
156 }
157 return joinedAttrb;
158 }
159
160 private static void
161 setIndxsAttrbsToJoin (int [] ix ){
162 ajn = ix.length;
163 attrsToJoinIndxs = new int[ajn];
164 for(int i = 0; i < ajn ; i++){
165 attrsToJoinIndxs[i] = ix[i];
166 }
167
168 }
169
170
171 private static boolean joinedAttribute(int AttrIndx){
172
173 for(int i =0; i < ajn; i++ ){
174 if (AttrIndx ==attrsToJoinIndxs[i]) {
175 return true;
176 }
177 }
178 return false;
179 }
180
181
182 private static void
183 setJoinedAttributesDefinition (int[] joinIndxs ){
184
185 setIndxsAttrbsToJoin(joinIndxs);
186
187 jna = ina - ajn +1;
188 joinedAttributesDefinition = new FastVector(jna);
189 joinedAttributesDefinition.insertElementAt(( getJoinedAtrrbDef()), jap);
190
191 for( int i = 1; i < jna; i++){
192 for(int j = 0; j < ina; j++){
193 if( !joinedAttribute(j)){
194 joinedAttributesDefinition.insertElementAt(initialDataSetAttributesDefinition[j], i);
195 i++;
196 }
197 }
198 }
199 }
200
201
202 private static Instance
203 getJoinedAttribIndexedValues(String[] joinedValAsString){
204
205 double[] indexedVals = new double[jna];
206
207 for (int i = 0; i < jna; i++){
208
209
210 int index = joinedAttributesDataSet.attribute(i).indexOfValue(joinedValAsString[i]);
211
212 assert index > -1:"nominal value not declared in header" ;
213
214
215
216 indexedVals[i] = (double)index;
217
218
219 }
220 Instance iv = new Instance( 1, indexedVals );
221 iv.setDataset(joinedAttributesDataSet);
222 return iv;
223 }
224
225
226 private static String
227 getJoinedValues(int indx){
228 String jv =initialAttributesStringValsSet[indx][attrsToJoinIndxs[0]];
229 for ( int i = 1; i < ajn; i++){
230 jv =jv + JOIN +initialAttributesStringValsSet[indx][attrsToJoinIndxs[i]];
231 }
232 return jv;
233 }
234
235
236 static int getFolds(){
237 return foldingsNum;
238 }
239
240 private static void
241 setJoinedAttributesStringValues( ){
242
243 joinedStringValsSet = new String[rowsNumber][jna];
244
245 for(int i = 0; i < rowsNumber; i++){
246
247 String[] theJoinedAttributeValues = new String[jna];
248 theJoinedAttributeValues[jap] = getJoinedValues(i);
249
250 for(int j = 1; j < jna; j ++){
251 for(int k = 0; k < ina; k++){
252 if( ( !joinedAttribute(k)) ){
253 theJoinedAttributeValues[j] = initialAttributesStringValsSet [i][k];
254 j++;
255 }
256
257 }
258 }
259 joinedStringValsSet[i] = theJoinedAttributeValues;
260 }
261
262 }
263
264
265 private static void
266 setInitialAttributeStringValues() {
267 initialAttributesStringValsSet = new String [rowsNumber][ina];
268 for(int row= 0;row< rowsNumber; row ++){
269 String[] theInitialAttributeStringValues = new String[ina];
270 Instance initialAttrbIndexedVals = initialDataSet.instance(row);
271
272 for(int column = 0; column < ina; column ++){
273 int AttrbuteStringValueIndex = (int )initialAttrbIndexedVals.value(column);
274 theInitialAttributeStringValues[column] = AttributeIndexToStringValueMapsList.get(column).get(AttrbuteStringValueIndex);
275
276
277 }
278 initialAttributesStringValsSet[row] = theInitialAttributeStringValues;
279 }
280 }
281
282
283
284 private static void
285 setInitialAttributesDefinition (){
286 HashMap<Integer, String> attrbuteIndexToStringValueMap =null;
287 int attrbValuesNumber =0;
288 AttributeIndexToStringValueMapsList = new ArrayList< HashMap<Integer, String> >(ina);
289 initialDataSetAttributesDefinition= new Attribute[ina] ;
290
291 for(int attributeIndex = 0; attributeIndex <ina; attributeIndex ++){
292 initialDataSetAttributesDefinition[attributeIndex] = initialDataSet.attribute(attributeIndex);
293 attrbValuesNumber = initialDataSetAttributesDefinition[attributeIndex].numValues();
294 attrbuteIndexToStringValueMap = new HashMap<Integer, String> (attrbValuesNumber);
295
296 for(int attrbVal = 0; attrbVal< attrbValuesNumber; attrbVal++ ){
297 attrbuteIndexToStringValueMap.put( attrbVal , initialDataSetAttributesDefinition[attributeIndex].value(attrbVal) );
298 }
299
300 AttributeIndexToStringValueMapsList.add(attributeIndex, attrbuteIndexToStringValueMap);
301 }
302 initialAttrbsValuesCounters = getAttrbValuesCounts(initialDataSet);
303 }
304
305
306
307
308 private static ArrayList <ArrayList <Integer>> getAttrbValuesCounts (Instances dataSet) {
309
310 ArrayList <ArrayList <Integer>> attrbsValuesCounters = getInitAttrbsValsCounters(dataSet);
311 int rows = dataSet.numInstances(),
312 columns = dataSet.numAttributes();
313 for(int tupleNumb = 0; tupleNumb < rows; tupleNumb++){
314
315 for(int attrbNum = 0; attrbNum <columns ; attrbNum++){
316
317 int attrbNumVals=dataSet.attribute(attrbNum).numValues() ;
318 for(int attrValNum = 0;attrValNum < attrbNumVals ;attrValNum++) {
319
320 if(
321 dataSet.instance(tupleNumb).stringValue(attrbNum).equalsIgnoreCase(dataSet.attribute(attrbNum).value(attrValNum))
322 ){
323 Integer attrValCount = attrbsValuesCounters.get(attrbNum).get(attrValNum);
324 attrValCount = attrValCount + 1;
325 attrbsValuesCounters.get(attrbNum).set(attrValNum, attrValCount);
326 }
327
328 }
329
330 }
331
332 }
333
334 return attrbsValuesCounters;
335 }
336
337
338 protected static boolean checkJoinedAttributeValsMinCount( ){
339
340 ArrayList <ArrayList <Integer>> ac = getAttrbValuesCounts (joinedAttributesDataSet);
341 ArrayList <Integer> a = ac.get(jap);
342 for (Integer val : a){
343 if ( val < minValsCount){
344 lg.info("The count for value: " + joinedAttrb.value(a.indexOf(val) ) + "is: \"" + val +"\" \n" );
345 return false;
346 }
347 }
348 return true;
349 }
350
351 private static void setAttrbValsCounts( ArrayList <Integer>attrbValsCounts, int attrbValsNumber){
352 Integer Z= 0;
353 int counts = attrbValsNumber;
354 for (int i =0; i < counts ; i++ ){
355 attrbValsCounts.add(Z);
356 }
357 }
358
359
360
361 private static ArrayList <ArrayList <Integer>> getInitAttrbsValsCounters(Instances dataSet){
362 int attrbsNum = dataSet.numAttributes();
363 ArrayList <ArrayList <Integer>> counters = new ArrayList <ArrayList <Integer>> (attrbsNum);
364 int attrbValsNumber = 0;
365 for(int z = 0; z < attrbsNum; z++){
366 attrbValsNumber = dataSet.attribute(z).numValues() ;
367 ArrayList <Integer> AttrValsCounts = new ArrayList <Integer>( attrbValsNumber);
368 setAttrbValsCounts(AttrValsCounts , attrbValsNumber);
369 counters.add(AttrValsCounts);
370 }
371 return counters;
372 }
373
374
375
376
377
378 private static void setClasIndex(){
379 classIndex = ina -1;
380 }
381
382
383
384 private static void
385 setInitialDataSetFromArrfFile(String arrfFileNme){
386 String fileName="data/t0.arff";
387 if(!arrfFileNme.equalsIgnoreCase("")) {
388 fileName = arrfFileNme;
389 }
390 File fil = new File(fileName);
391 ArffLoader L = new ArffLoader();
392 try {
393 L.setSource(fil);
394 initialDataSet = new Instances(L.getDataSet()) ;
395 initialDataSet0 = new Instances( initialDataSet );
396 ina = initialDataSet.numAttributes();
397 setClasIndex();
398 initialDataSet.setClassIndex(classIndex);
399 initialDataSet0.setClassIndex(classIndex);
400 rowsNumber = initialDataSet.numInstances();
401 SetXvalidtnFoldsNum();
402 }catch ( IOException e) {
403
404 System.err.println(" arff file not found:" + arrfFileNme +"\n" );
405 System.exit(1);
406 }
407
408 searchProperties=new Properties();
409 try{
410 FileReader rd = new FileReader(propertiesFilename);searchProperties.load(rd);
411 runID=Integer.parseInt((String)searchProperties.get("runNo"));
412 writtenFileNo =Integer.parseInt((String)searchProperties.get("writtenFileNo"));
413 minValsCount = Integer.parseInt((String)searchProperties.get("minValsCount"));
414 rd.close();
415 }catch(Exception e){
416 System.err.println(e.getMessage());
417 }
418
419 SLBD = SLBD_back + (getJAstring(initialDataSet0.relationName() ))+ runID+"/";
420 runID++;
421
422
423 initialDataSet0ClassifierEvaluation = getDataSetClassifierEvaluation(initialDataSet0);
424 setInitialAttributesDefinition ();
425 setMajorityClassCounts();
426
427 }
428
429
430
431
432 private static void setMajorityClassCounts(){
433 initialDataSet0MaxClassValattrValsCounts = setAttributesValuesCounts(
434 maxClassValDataSet0 = getMajorityClassValueDataSet(initialDataSet0)
435 );
436 Integer [] kl = initialDataSet0MaxClassValattrValsCounts.keySet().toArray(new Integer [0]);
437 initialDataSet0MaxClassValAttrValsSortedList = new Double[kl.length];
438 for(int i =0; i < kl.length; i++){
439 initialDataSet0MaxClassValAttrValsSortedList[i]= (double)kl[i];
440 }
441
442
443 Arrays.sort(initialDataSet0MaxClassValAttrValsSortedList);
444 initialDataSet0MaxClassValAttrValsSortedListNormalized = Arrays.copyOf( initialDataSet0MaxClassValAttrValsSortedList, initialDataSet0MaxClassValAttrValsSortedList.length);
445 for(int c = 0 ; c < initialDataSet0MaxClassValAttrValsSortedListNormalized.length; c++){
446 initialDataSet0MaxClassValAttrValsSortedListNormalized[c] = (
447 initialDataSet0MaxClassValAttrValsSortedListNormalized[c]
448 /
449 (1.000 * maxClassValDataSet0.numInstances() )
450 );
451 }
452 }
453
454
455 protected static final Double[][] getBaseLineMaxClassCounts(){
456 Double[][] valsCounts ={
457
458 initialDataSet0MaxClassValAttrValsSortedList,
459
460 initialDataSet0MaxClassValAttrValsSortedListNormalized
461 };
462 return valsCounts;
463 }
464
465 private static void
466 setJoinedAttrbIndexedVals(){
467 for(int i = 0; i < rowsNumber; i++){
468 joinedAttributesDataSet.add(getJoinedAttribIndexedValues(joinedStringValsSet[i]));
469 }
470 }
471
472
473 protected static void SetXvalidtnFoldsNum(){
474 if( rowsNumber > 49) {
475 foldingsNum = 10;
476 }
477 else if (rowsNumber < 10) {
478 foldingsNum = 2;
479 }
480 else {
481 foldingsNum =( 1 + (rowsNumber / 10) );
482 }
483 }
484
485
486 private static Evaluation getDataSetClassifierEvaluation(Instances dataSet){
487 Evaluation e = null;
488 try{
489 e = new Evaluation( dataSet, new CostMatrix(dataSet.attribute(dataSet.classIndex()).numValues()));
490 e.crossValidateModel(
491 classifierSB , dataSet, foldingsNum, randomIntGenerator
492 );
493 }catch( Exception ex){
494 System.err.println("something wrong with data set read from" + ex);
495 System.exit(AT);
496 }
497 return e;
498 }
499
500
501 private static void testCase( int[] tuples, String[][] expectedVals){
502 for ( int i =0 ; i < tuples.length; i++){
503 for(int j = 0 ; j < jna; j ++){
504 assert joinedStringValsSet[tuples[i]][ j].endsWith(expectedVals[i][j]);
505 }
506 }
507 }
508
509
510 protected static void setJoinedAttributesValuesTest(String arrFileName) {
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549 int[] indexes1 = {1,2};
550
551 int[] Tuple ={ 0, 7, 13};
552 String[][] expected2 ={
553 {"b1-c1","a1","d2","x2"} ,
554 { "b2-c1","a1","d2","x2"} ,
555 {"b2-c1","a3","d1","x2"}
556 };
557 setInitialDataSetFromArrfFile( arrFileName);
558 setInitialAttributeStringValues();
559 computeJoinedAttrbsWorkingDataSet (indexes1);
560
561
562
563
564 assert joinedStringValsSet.length == rowsNumber: joinedStringValsSet.length ;
565 assert joinedStringValsSet[3].length == jna;
566 testCase(Tuple, expected2);
567 System.out.println(" \n joined2 \n" + joinedAttributesDataSet.toString() );
568
569
570 int[] index2 = {1,2,3};
571 setIndxsAttrbsToJoin(index2);
572 String[][] expected3={
573 {"b1-c1-d2","a1","x2" },
574 { "b2-c1-d2","a1","x2"},
575 { "b2-c1-d1","a3","x2"}
576 };
577
578 computeJoinedAttrbsWorkingDataSet (index2);
579
580 assert joinedStringValsSet.length == rowsNumber: joinedStringValsSet.length ;
581 assert joinedStringValsSet[3].length == jna;
582 testCase(Tuple, expected3);
583 System.out.println(" \n joined3 \n" + joinedAttributesDataSet.toString() );
584
585 }
586
587
588 private static String getJoinedAttrbsDataSetName(){
589 StringBuilder n = new StringBuilder( attrsToJoinIndxs.length + "JA[" );
590 for (int i =0 ; i < attrsToJoinIndxs.length-1 ; i++){
591 n.append(attrsToJoinIndxs[i]+"-");
592 }
593 n.append( (attrsToJoinIndxs[attrsToJoinIndxs.length-1] )+ "] Atrb Name=[" );
594 for (int i =0 ; i < attrsToJoinIndxs.length-1 ; i++){
595 n.append(initialDataSet.attribute(attrsToJoinIndxs[i]).name()+ "-");
596 }
597 n.append(initialDataSet.attribute(attrsToJoinIndxs[attrsToJoinIndxs.length-1]).name()+ "] From:");
598 n.append(initialDataSet.relationName()+"\n");
599
600
601 return n.toString();
602 }
603
604 private static int getJoinedAttrbDataSetClassIndex(){
605 return jna -1;
606 }
607
608
609 private static void
610 setJoinedAttrbsTestingDataSet (int[] joinIndxs){
611
612 setJoinedAttributesDefinition ( joinIndxs);
613 joinedAttributesDataSet = new Instances(
614 getJoinedAttrbsDataSetName(),
615 joinedAttributesDefinition, 1
616 );
617 joinedAttributesDataSet.setClassIndex(getJoinedAttrbDataSetClassIndex());
618 setJoinedAttributesStringValues ();
619 setJoinedAttrbIndexedVals();
620
621 }
622
623 private static int antSearchNo=0;
624 private static String actxt="";
625 protected static void setAntContext(String c){
626
627 actxt = c;
628 }
629
630 protected static void setAntid(int antNum){
631 antSearchNo = antNum;
632 }
633
634 protected static final String getSBS(){
635 return SLBD;
636 }
637
638 private static boolean badChar(char c){
639 boolean bc = false;
640 if (!Character.isDigit(c) && !Character.isLetter(c)){
641 bc = true;
642 }
643 return bc;
644 }
645
646 private static String getJAstring(String jaNme){
647 String n= jaNme.trim();
648 char[] bch = new char[6];
649 int bc= 0;
650 if(n.length() >6 ){
651 n =n.substring( 0, 6);
652 }
653 for(int x = 0 ; x< n.length(); x++ ) {
654 if (badChar(n.charAt(x))) {
655 bch[bc] = n.charAt(x);
656 bc++;
657 }
658 }
659 for(int r = 0 ; r<bc; r++){
660 n = n.replace(bch[r], 'M');
661 }
662
663
664 return n;
665 }
666
667 protected static void
668 computeJoinedAttrbsWorkingDataSet (int[] joinIndxs){
669 String d = SLBD+"AS/", e="-FID-"+writtenFileNo+"No.arff";
670 writtenFileNo++;
671 setJoinedAttributesDefinition ( joinIndxs);
672 joinedAttributesDataSet = new Instances(
673 getJoinedAttrbsDataSetName(),
674 joinedAttributesDefinition, 1
675 );
676 joinedAttributesDataSet.setClassIndex(getJoinedAttrbDataSetClassIndex());
677 setJoinedAttributesStringValues ();
678
679 setJoinedAttrbIndexedVals();
680
681 d=d+"ant"+antSearchNo+"/JA"+actxt+
682 getJAstring(joinedAttributesDataSet.attribute(jap).name())
683 +e;
684
685 saveDataSet(d, joinedAttributesDataSet);
686
687 joinedAttributesDataSetClassifierEvaluation = getDataSetClassifierEvaluation(joinedAttributesDataSet);
688
689
690
691 if(rowsNumber < 1){
692 lw("\nJoined Attributes Datset created is:\n"+ joinedAttributesDataSet.toSummaryString()
693 + "\n --End of Joined Attributes Datset created details--\n"
694 );
695 }
696 antSearchNo++;
697
698 }
699
700
701 private static volatile SimpleBayesDataProcessor singleInstace = new SimpleBayesDataProcessor();
702 private SimpleBayesDataProcessor(){
703
704 }
705
706
707 protected static void initColonyEvaluationFolds(){
708 initialDatatSetFolded = new Instances(initialDataSet);
709 initialDatatSetFolded.randomize(randomIntGenerator);
710 initialDatatSetFolded.stratify(foldingsNum);
711
712 }
713
714
715
716
717 private static Instances initialTrainingDataSet =null;
718 protected static void setACOworkingSetFold(int workingFoldNumber){
719 initialTrainingDataSet = new Instances (
720 initialDatatSetFolded.trainCV( foldingsNum , workingFoldNumber, randomIntGenerator )
721 );
722 initialDataSet = initialTrainingDataSet ;
723 rowsNumber = initialDataSet.numInstances();
724 setInitialAttributeStringValues();
725 }
726
727
728
729
730
731
732
733
734
735 private static Instances colonyJoinedAttributesDataSet =null;
736 protected static Instances getACOjoinedAttribsTrainingDataSet(){
737
738 return colonyJoinedAttributesDataSet ;
739
740 }
741
742
743
744
745
746
747 protected static final EvaluationACO setAC0TestingSetFoldGetColonySearchEvaluation( int workingFoldNumber, int[] AC0joinedAttributes){
748 NaiveBayes bCAco = new NaiveBayes();
749 String dir="", dirb =SLBD+"COS/", e = "+-FID-"+ writtenFileNo+"No.arff";
750 writtenFileNo++;
751
752
753
754 initialDataSet = initialDatatSetFolded.testCV(foldingsNum, workingFoldNumber);
755 rowsNumber = initialDataSet.numInstances();
756 setInitialAttributeStringValues();
757 setJoinedAttrbsTestingDataSet(AC0joinedAttributes);
758 Instances joinedAttributesTestingDataSet= new Instances(joinedAttributesDataSet);
759
760 dir=dirb+"JA"+
761 getJAstring( joinedAttributesTestingDataSet.attribute(0).name() )+
762 "F"+workingFoldNumber+"/";
763
764 saveDataSet(dir+"testDsFd"+workingFoldNumber+e, joinedAttributesTestingDataSet );
765
766 initialDataSet = initialTrainingDataSet;
767 rowsNumber = initialDataSet.numInstances();
768 setInitialAttributeStringValues();
769 setJoinedAttrbsTestingDataSet(AC0joinedAttributes);
770 Instances joinedAttribsTrainingDataSet = new Instances(joinedAttributesDataSet);
771 saveDataSet(dir+"traDsFd"+workingFoldNumber+e, joinedAttribsTrainingDataSet );
772
773 initialDataSet = new Instances ( initialDataSet0 );
774 rowsNumber = initialDataSet.numInstances();
775 setInitialAttributeStringValues();
776 setJoinedAttrbsTestingDataSet(AC0joinedAttributes);
777 colonyJoinedAttributesDataSet = new Instances(joinedAttributesDataSet);
778 saveDataSet(dir+"jaIDSfullFd"+workingFoldNumber+e, colonyJoinedAttributesDataSet );
779
780
781
782
783
784
785
786
787
788
789 try{
790 colonyEvaluation = new EvaluationACO( joinedAttributesDataSet , new CostMatrix(joinedAttributesDataSet.numClasses())
791 );
792 colonyEvaluation.validateACOmodel(bCAco, joinedAttribsTrainingDataSet , joinedAttributesTestingDataSet);
793 } catch( Exception ex){
794 System.err.println("something wrong with data set read from" + ex);
795 }
796
797
798
799 return colonyEvaluation;
800 }
801
802
803 protected static int getInitalDataSetAttribsNumber(){
804 return ina;
805 }
806
807
808 protected static Evaluation getBaslineEvaluation(){
809
810 return initialDataSet0ClassifierEvaluation;
811 }
812
813
814 protected static Evaluation getClassifierEnginePerformanceOnJoinedAttributesDataset (){
815 return joinedAttributesDataSetClassifierEvaluation;
816 }
817
818 protected static Classifier getClassifierEngine(){
819
820 return classifierSB;
821 }
822
823 protected void getAttValsCountTest(String dataFle){
824 setInitialDataSetFromArrfFile(dataFle);
825 initialAttrbsValuesCounters.get(4).get(0);
826 initialAttrbsValuesCounters.get(3).get(1);
827 }
828
829 protected static void processDataFromSource(String dataSetFile){
830
831 setInitialDataSetFromArrfFile(dataSetFile);
832 }
833
834 private static void lw (String output){
835 wl.logMessage(output);
836 }
837
838
839 private static void saveDataSet(final String file ,final Instances ds){
840 arffSaver.setInstances(ds);
841 try{
842
843 arffSaver.setFile(new File( file));
844 arffSaver.writeBatch();
845 }catch(java.lang.Exception e){
846 StringBuilder m = new StringBuilder("System cannot save dataset:\n" + ds.toSummaryString()+"\n to file:\n "+ file);
847 lg.warn(m);
848 System.err.println(m);
849 }
850 }
851
852
853 private static Instances getMajorityClassValueDataSet(Instances dataSet){
854 classIndex = dataSet.classIndex();
855 if(classIndex !=dataSet.numAttributes() -1 ) {
856 System.err.println("Class is hardcoded into the last colunm at getMajorityClassValueDataSet(Instances dataSet) \n");
857 System.exit(AT);
858 }
859 int [] classValsCounts= dataSet.attributeStats(classIndex).nominalCounts;
860
861 HashMap<Integer,Integer> CountToValindex = new HashMap<Integer,Integer>(classValsCounts.length);
862 for ( int c = 0; c< classValsCounts.length ; c++){
863 CountToValindex.put(classValsCounts[c], c);
864 }
865 Arrays.sort(classValsCounts);
866 Integer maxClassVal =(CountToValindex.get( classValsCounts[classValsCounts.length -1]));
867
868
869 Instances maxClassValDataSet =EstimatorUtils.getInstancesFromClass(dataSet, classIndex, maxClassVal);
870 return maxClassValDataSet;
871 }
872
873
874 private static HashMap<Integer, HashMap<Integer, HashSet<Integer>>>
875 setAttributesValuesCounts(Instances dataSet){
876 HashMap<Integer, HashMap<Integer, HashSet<Integer>>> counts = new HashMap<Integer, HashMap<Integer, HashSet<Integer>>>();
877 for(int row = 0; row < dataSet.numInstances(); row ++){
878 for ( int col = 0; col < dataSet.numAttributes()-1; col ++){
879 int[] attrbValsCounts = dataSet.attributeStats(col).nominalCounts;
880
881 for( int valindex= 0; valindex < attrbValsCounts.length; valindex ++ ){
882
883 if ( !counts.containsKey(attrbValsCounts[valindex])){
884
885 HashSet<Integer> attrbVals = new HashSet<Integer>();
886 attrbVals.add(valindex);
887
888 HashMap<Integer, HashSet<Integer>> attrbToVal = new HashMap<Integer, HashSet<Integer>>();
889 attrbToVal.put(col, attrbVals);
890 counts.put( attrbValsCounts[valindex] , attrbToVal );
891 }else{
892 if(!counts.get(attrbValsCounts[valindex]).containsKey(col)) {
893 HashMap<Integer, HashSet<Integer>> aTv = counts.get(attrbValsCounts[valindex]);
894 HashSet<Integer> aVals=new HashSet<Integer>();
895 aVals.add(valindex);
896 aTv.put(col, aVals);
897 counts.put( attrbValsCounts[valindex] , aTv );
898 }else{
899 HashMap<Integer, HashSet<Integer>> aTv = counts.get(attrbValsCounts[valindex]);
900 HashSet<Integer> aVals=aTv.get(col);
901 aVals.add(valindex);
902 aTv.put(col, aVals);
903 counts.put( attrbValsCounts[valindex] , aTv );
904 }
905 }
906 }
907 }
908 }
909
910 return counts;
911 }
912
913
914 protected static final HashMap<Integer, HashMap<Integer, HashSet<Integer>>> getbaseLineCountToAttMap(){
915
916 return initialDataSet0MaxClassValattrValsCounts;
917 }
918
919 protected static void finalSerchUpdateSearchPropertiesFile(){
920 searchProperties.put("runNo", Integer.toString(( new Integer (runID))));
921 searchProperties.put("runNo", Integer.toString(( new Integer (writtenFileNo))));
922 try{
923 boolean append =true;
924 FileWriter w = new FileWriter(propertiesFilename,append);
925 searchProperties. store(w, "updates");
926 w.close();
927 }catch(IOException e){
928 System.err.println(e.getMessage());
929 }
930
931 }
932
933
934
935
936
937
938
939
940 protected static final Logger lg= Logger.getLogger(SimpleBayesDataProcessor.class);
941
942 private static String JOIN = "-" ;
943
944
945
946 private static Instances initialDataSet = null;
947 private static Attribute[] initialDataSetAttributesDefinition=null;
948 private static String [][] initialAttributesStringValsSet = null;
949
950
951 private static Instances initialDatatSetFolded = null;
952
953 private static Instances initialDataSet0 = null;
954 private static EvaluationACO colonyEvaluation= null;
955
956
957 private static Instances joinedAttributesDataSet = null;
958 private static FastVector joinedAttributesDefinition = null;
959 private static String[][] joinedStringValsSet = null;
960 private static Attribute joinedAttrb = null;
961 private static Evaluation joinedAttributesDataSetClassifierEvaluation = null;
962
963 private static int jna = 0;
964 private static int ina = 0;
965 private static int rowsNumber = 0;
966 private static int ajn = 0;
967 private static int jap = 0 ;
968 private static int classIndex = -1;
969
970 private static int[] attrsToJoinIndxs = null;
971
972 private static Evaluation initialDataSet0ClassifierEvaluation = null;
973
974 private static NaiveBayes classifierSB = new NaiveBayes();
975 private static final Random randomIntGenerator = new Random();
976
977 protected static int foldingsNum = 0;
978 private static LogPanel wl =null;
979 private static int AT= 1;
980
981 private static ArrayList <ArrayList <Integer>> initialAttrbsValuesCounters = null;
982
983 private static ArrayList< HashMap<Integer, String> > AttributeIndexToStringValueMapsList =null;
984
985 private static HashMap<Integer, HashMap<Integer, HashSet<Integer>>> initialDataSet0MaxClassValattrValsCounts = null;
986
987 private static Double [] initialDataSet0MaxClassValAttrValsSortedList =null;
988
989 private static Instances maxClassValDataSet0 = null;
990 private static Double [] initialDataSet0MaxClassValAttrValsSortedListNormalized = null;
991
992
993 private static String SLBD ="S/", SLBD_back = "S/";
994
995 private static String system = Main.system;
996 private static String propertiesFilename = system +"search.properties";
997 private static Properties searchProperties = null;
998
999 private static int minValsCount =0;
1000 private static int writtenFileNo=-10;
1001 private static int runID = -99;
1002
1003 private static weka.core.converters.ArffSaver arffSaver = new weka.core.converters.ArffSaver();
1004 }
1005