diff --git a/src/com/jgaap/backend/API.java b/src/com/jgaap/backend/API.java
index 87630a6ed..62b1faeae 100644
--- a/src/com/jgaap/backend/API.java
+++ b/src/com/jgaap/backend/API.java
@@ -1,849 +1,905 @@
-/*
- * JGAAP -- a graphical program for stylometric authorship attribution
- * Copyright (C) 2009,2011 by Patrick Juola
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as
- * published by the Free Software Foundation, either version 3 of the
- * License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see .
- */
-package com.jgaap.backend;
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Set;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.TimeUnit;
-
-import org.apache.log4j.Logger;
-
-import com.jgaap.classifiers.LeaveOneOutNoDistanceDriver;
-import com.jgaap.generics.AnalysisDriver;
-import com.jgaap.generics.AnalyzeException;
-import com.jgaap.generics.CanonicizationException;
-import com.jgaap.generics.Canonicizer;
-import com.jgaap.generics.DistanceFunction;
-import com.jgaap.generics.EventCuller;
-import com.jgaap.generics.EventCullingException;
-import com.jgaap.generics.EventDriver;
-import com.jgaap.generics.EventGenerationException;
-import com.jgaap.generics.Language;
-import com.jgaap.generics.LanguageParsingException;
-import com.jgaap.generics.NeighborAnalysisDriver;
-import com.jgaap.generics.NonDistanceDependentAnalysisDriver;
-import com.jgaap.generics.ValidationDriver;
-import com.jgaap.generics.WEKAAnalysisDriver;
-import com.jgaap.languages.English;
-import com.jgaap.util.Document;
-import com.jgaap.util.EventSet;
-
-/**
- *
- * This class provides a simple interface into jgaap for use in
- * other software packages and for development of any human interfaces.
- *
- * Instructions for using the JGAAP API:
- *
- * First add documents both known and unknown
- *
- * All other settings can be performed in any order which are setLanguage, addCanonicizer,
- * addEventDriver, addEventCuller, addAnalysisDriver, addDistanceFunction
- * Note: of the settings only one EventDriver and one AnalysisDriver are required to run an experiment
- *
- * The execute method is then used to start the experiment running
- *
- * Results are placed in unknown documents to access them simple use the getUnknownDocuments method in the API
- * The results can be retrieved as a List> this is a sorted list
- * from most likely to least likely author followed by a score generated based on your settings using the getRawResult method
- * You can also get a Map of Maps of the raw results (Map>>) with the getRawResults method
- * They can also be retrieved as a string using either the getFormattedResult or getResult methods.
- *
- * For examples of how to use the API class see the com.jgaap.ui package for a GUI example
- * or the com.jgaap.backend.CLI class for a command line example
- *
- * @author Michael Ryan
- * @since 5.0.0
- */
-public class API {
-
- static Logger logger = Logger.getLogger(API.class);
-
- private List documents;
- private Language language;
- private List eventDrivers;
- private List eventCullers;
- private List analysisDrivers;
-
- private ExecutorService executor;
-
- private static final API INSTANCE = new API();
-
- private API() {
- documents = new ArrayList();
- language = new English();
- eventDrivers = new ArrayList();
- eventCullers = new ArrayList();
- analysisDrivers = new ArrayList();
- }
-
- /**
- * This allows a singleton of the api to be used in the gui
- * or any program that needs to access a single copy of JGAAP
- * from multiple classes
- *
- * @return a reference to the singleton API
- */
- public static API getInstance(){
- return INSTANCE;
- }
-
- /**
- * This is a unique instance of the api to be used when running
- * bulk experiments and you want to reset everything or if you
- * want to thread running more than one experiment at a time
- * as in the class com.jgaap.backend.ExperimentEngine
- *
- * @return a unique API instance
- */
- public static API getPrivateInstance(){
- return new API();
- }
-
- /**
- *
- * This allows for the addition of documents to the system.
- * Both Training (known) and Sample (unknown) documents must be provided before running an experiment.
- * Training Documents are added by providing an author(tag) for them.
- * Sample documents are added when no author(tag) is given.
- *
- * @param filepath - the system file path or URL to a document
- * @param author - the author of this document or the tag being applied to this document, if null or the empty string this document is considered unknown and is one of those classified
- * @param title - Some means of identifying the document, if null or the empty string are provided a title will be generated from the file name
- * @return - a reference to the document generated
- * @throws Exception - if there is a problem loading the document from file web or parsing file format
- */
- public Document addDocument(String filepath, String author, String title)
- throws Exception {
- Document document = new Document(filepath, author, title);
- return addDocument(document);
- }
-
- /**
- * Adds a previously generated document to the jgaap system.
- *
- * @param document - a file that has already been loaded as a Document
- * @return - a reference to the document generated
- */
- public Document addDocument(Document document) {
- documents.add(document);
- logger.info("Adding Document "+document.toString());
- return document;
- }
-
- /**
- * Removes a document from the system.
- *
- * @param document - a reference to the document that is to be removed
- * @return - true on success false on failure
- */
- public Boolean removeDocument(Document document) {
- logger.info("Removing Document "+document.toString());
- return documents.remove(document);
- }
-
- /**
- * Removes all documents loaded into the system.
- */
- public void removeAllDocuments() {
- logger.info("Removing all Documents");
- documents.clear();
- }
-
- /**
- * Get a List of all Documents currently loaded into jgaap
- *
- * @return - a List of Documents loaded into the system
- */
- public List getDocuments() {
- return documents;
- }
-
- /**
- * Get a List of all currently loaded Documents that do not have an author(tag)
- *
- * @return List of Documents without authors
- */
- public List getUnknownDocuments() {
- List unknownDocuments = new ArrayList();
- for (Document document : documents) {
- if (!document.isAuthorKnown()) {
- unknownDocuments.add(document);
- }
- }
- return unknownDocuments;
- }
-
- /**
- * Get a List of Documents currently loaded into the system that have a author(tag)
- *
- * @return List of Documents with authors
- */
- public List getKnownDocuments() {
- List knownDocuments = new ArrayList();
- for (Document document : documents) {
- if (document.isAuthorKnown()) {
- knownDocuments.add(document);
- }
- }
- return knownDocuments;
- }
-
- /**
- * Get a List of Documents that all have the same author(tag)
- *
- * @param author - the author(tag) to select documents on
- * @return - List of Documents limited by the author provided
- */
- public List getDocumentsByAuthor(String author) {
- List authorDocuments = new ArrayList();
- for (Document document : documents) {
- if (document.isAuthorKnown()) {
- if (author.equalsIgnoreCase(document.getAuthor())) {
- authorDocuments.add(document);
- }
- }
- }
- return authorDocuments;
- }
-
- /**
- * Get a List of all unique authors(tags) applied to Known(Training) Documents
- *
- * @return List of authors
- */
- public List getAuthors() {
- Set authors = new HashSet();
- for (Document document : documents) {
- if (document.isAuthorKnown()) {
- authors.add(document.getAuthor());
- }
- }
- List authorsList = new ArrayList(authors);
- Collections.sort(authorsList);
- return authorsList;
- }
-
- /**
- * Loads the documents from the file system
- * @throws Exception
- */
- public void loadDocuments() throws Exception{
- for(Document document : documents){
- document.load();
- }
- }
-
- /**
- * Adds the specified canonicizer to all documents currently loaded in the system.
- *
- * @param action - the unique string name representing a canonicizer (displayName())
- * @return - a reference to the canonicizer added
- * @throws Exception - if the canonicizer specified cannot be found or instanced
- */
- public Canonicizer addCanonicizer(String action) throws Exception {
- Canonicizer canonicizer = Canonicizers.getCanonicizer(action);
- for (Document document : documents) {
- addCanonicizer(canonicizer, document);
- }
- return canonicizer;
- }
-
- /**
- * Adds the specified canonicizer to all Documents that have the DocType docType.
- *
- * @param action - the unique string name representing a canonicizer (displayName())
- * @param docType - The DocType this canonicizer is restricted to
- * @return - a reference to the canonicizer added
- * @throws Exception - if the canonicizer specified cannot be found or instanced
- */
- public Canonicizer addCanonicizer(String action, Document.Type docType) throws Exception {
- Canonicizer canonicizer = Canonicizers.getCanonicizer(action);
- for (Document document : documents) {
- if (document.getDocType().equals(docType)) {
- addCanonicizer(canonicizer, document);
- }
- }
- return canonicizer;
- }
-
- /**
- * Add the Canonicizer specified to the document referenced.
- *
- * @param action - the unique string name representing a canonicizer (displayName())
- * @param document - the Document to add the canonicizer to
- * @return - a reference to the canonicizer added
- * @throws Exception - if the canonicizer specified cannot be found or instanced
- */
- public Canonicizer addCanonicizer(String action, Document document)
- throws Exception {
- Canonicizer canonicizer = Canonicizers.getCanonicizer(action);
- return addCanonicizer(canonicizer, document);
- }
-
- /**
- * Add the Canonicizer specified to the document referenced.
- *
- * @param canonicizer - the canonicizer to add
- * @param document - the Document to add the canonicizer to
- * @return - a reference to the canonicizer added
- */
- public Canonicizer addCanonicizer(Canonicizer canonicizer, Document document) {
- document.addCanonicizer(canonicizer);
- logger.info("Adding Canonicizer "+canonicizer.displayName()+" to Document "+document.toString());
- return canonicizer;
- }
-
- public Canonicizer addCanonicizer(String action, EventDriver eventDriver) throws Exception {
- Canonicizer canonicizer = Canonicizers.getCanonicizer(action);
- return addCanonicizer(canonicizer, eventDriver);
- }
-
- public Canonicizer addCanonicizer(Canonicizer canonicizer, EventDriver eventDriver) {
- eventDriver.addCanonicizer(canonicizer);
- logger.info("Adding Canonicizer "+canonicizer.displayName()+" to EventDriver "+eventDriver.displayName());
- return canonicizer;
- }
-
- /**
- * Removes the first instance of the canoniciser corresponding to the action(displayName())
- * from the Document referenced.
- *
- * @param canonicizer - the canonicizer to be removed
- * @param document - a reference to the Document to remove the canonicizer from
- */
- public void removeCanonicizer(Canonicizer canonicizer, Document document) {
- document.removeCanonicizer(canonicizer);
- }
-
- public void removeCanonicizer(Canonicizer canonicizer, EventDriver eventDriver) {
- eventDriver.removeCanonicizer(canonicizer);
- }
-
- /**
- * Removes the first occurrence of the canonicizer corresponding to the action(displayName())
- * from every document
- *
- * @param canonicizer - the canonicizer to be removed
- */
- public void removeCanonicizer(Canonicizer canonicizer) {
- for (Document document : documents) {
- removeCanonicizer(canonicizer, document);
- }
- }
-
- /**
- * Removes the first occurrence of the canonicizer from every Document of the DocType docType
- *
- * @param canonicizer - the canonicizer to be removed
- * @param docType - the DocType to remove the canonicizer from
- */
- public void removeCanonicizer(Canonicizer canonicizer, Document.Type docType) {
- for (Document document : documents) {
- if (document.getDocType().equals(docType)) {
- removeCanonicizer(canonicizer, document);
- }
- }
- }
-
- /**
- * Removes all canonicizers from Documents with the DocType docType
- *
- * @param docType - the DocType to remove canonicizers from
- */
- public void removeAllCanonicizers(Document.Type docType) {
- for (Document document : documents) {
- document.clearCanonicizers();
- }
- }
-
- /**
- * Removes all canonicizers from All Documents loaded in the system
- */
- public void removeAllCanonicizers() {
- for (Document document : documents) {
- document.clearCanonicizers();
- }
- }
-
- /**
- * Add an Event Driver which will be used to
- * eventify(Generate a List of Events order in the sequence they are found in the document)
- * all of the documents
- * @param action - the identifier for the EventDriver to add (displayName())
- * @return - a reference to the added EventDriver
- * @throws Exception - If the action is not found or the EventDriver cannot be instanced
- */
- public EventDriver addEventDriver(String action) throws Exception {
- EventDriver eventDriver = EventDrivers.getEventDriver(action);
- return addEventDriver(eventDriver);
- }
-
- /**
- * Add an Event Driver which will be used to
- * eventify(Generate a List of Events order in the sequence they are found in the document)
- * all of the documents
- * @param eventDriver - the EventDriver to add
- * @return - a reference to the added EventDriver
- */
- public EventDriver addEventDriver(EventDriver eventDriver) {
- eventDrivers.add(eventDriver);
- logger.info("Adding EventDriver "+eventDriver.displayName());
- return eventDriver;
- }
-
- /**
- * Removes the Event Driver reference from the system
- * @param eventDriver - the EventDriver to be removed
- * @return - true if successful false if failure
- */
- public Boolean removeEventDriver(EventDriver eventDriver) {
- logger.info("Removing EventDriver "+eventDriver.displayName());
- return eventDrivers.remove(eventDriver);
- }
-
- /**
- * Removes all EventDrivers from the system
- */
- public void removeAllEventDrivers() {
- eventDrivers.clear();
- for (Document document : documents) {
- document.clearEventSets();
- }
- }
-
- /**
- * Gets a List of all EventDrivers currently loaded in the system
- * @return List of All loaded EventDrivers
- */
- public List getEventDrivers() {
- return eventDrivers;
- }
-
- /**
- * Add an Event Culler to the system
- *
- * @param action - unique identifier for the event culler to add (displayName())
- * @return - a reference to the added event culler
- * @throws Exception - if the EventCuller cannot be found or cannor be instanced
- */
- public EventCuller addEventCuller(String action) throws Exception {
- EventCuller eventCuller = EventCullers.getEventCuller(action);
- eventCullers.add(eventCuller);
- for(EventDriver eventDriver : eventDrivers) {
- addEventCuller(eventCuller, eventDriver);
- }
- return eventCuller;
- }
-
- public EventCuller addEventCuller(String action, EventDriver eventDriver) throws Exception {
- EventCuller eventCuller = EventCullers.getEventCuller(action);
- return addEventCuller(eventCuller, eventDriver);
- }
-
- public EventCuller addEventCuller(EventCuller eventCuller, EventDriver eventDriver) {
- eventDriver.addCuller(eventCuller);
- logger.info("Adding EventCuller "+eventCuller.displayName()+" to "+eventDriver.displayName());
- return eventCuller;
- }
-
- /**
- * Remove the supplied EventCuller from the system
- *
- * @param eventCuller - EventCuller to be removed
- * @return - true if success false if failure
- */
- public Boolean removeEventCuller(EventCuller eventCuller) {
- logger.info("Removing EventCuller "+eventCuller.displayName());
- eventCullers.remove(eventCuller);
- for(EventDriver eventDriver : eventDrivers){
- eventDriver.removeCuller(eventCuller);
- }
- return true;
- }
-
- /**
- * Removes all loaded EventCullers from the system
- */
- public void removeAllEventCullers() {
- eventCullers.clear();
- for(EventDriver eventDriver : eventDrivers){
- eventDriver.clearCullers();
- }
- }
-
- /**
- * Get a List of all EventCullers currently loaded in the system
- * @return List of EventCullers loaded
- */
- public List getEventCullers() {
- return eventCullers;
- }
-
- /**
- * Add an AnalysisDriver to the system as referenced by the action.
- *
- * @param action - the unique identifier for a AnalysisDriver (alternately a DistanceFunction)
- * @return - a reference to the generated Analysis Driver
- * @throws Exception - If the AnalysisDriver cannot be found or if it cannot be instanced
- */
- public AnalysisDriver addAnalysisDriver(String action) throws Exception {
- AnalysisDriver analysisDriver = AnalysisDrivers.getAnalysisDriver(action);
- return addAnalysisDriver(analysisDriver);
- }
-
- public AnalysisDriver addAnalysisDriver(AnalysisDriver analysisDriver) {
- logger.info("Adding AnalysisDriver "+analysisDriver.displayName());
- analysisDrivers.add(analysisDriver);
- return analysisDriver;
- }
-
- /**
- * Removed the passed AnalysisDriver from the system
- * @param analysisDriver - reference to the AnalysisDriver to be removed
- * @return True if success false if failure
- */
- public Boolean removeAnalysisDriver(AnalysisDriver analysisDriver) {
- logger.info("Removing AnalysisDriver "+analysisDriver.displayName());
- return analysisDrivers.remove(analysisDriver);
- }
-
- /**
- * Removes all AnalysisDrivers from the system
- */
- public void removeAllAnalysisDrivers() {
- analysisDrivers.clear();
- }
-
- /**
- * Adds a DistanceFunction to the AnalysisDriver supplied.
- * Only AnalysisDrivers that extend the NeighborAnalysisDriver can be used
- *
- * @param action - unique identifier for the DistanceFunction you want to add
- * @param analysisDriver - a reference to the AnalysisDriver you want the distance added to
- * @return - a reference to the generated DistanceFunction
- * @throws Exception - if the AnalysisDriver does not extend NeighborAnalysisDriver or if the DistanceFunction cannot be found the DistanceFunction cannot be instanced
- */
- public DistanceFunction addDistanceFunction(String action,
- AnalysisDriver analysisDriver) throws Exception {
- DistanceFunction distanceFunction = DistanceFunctions
- .getDistanceFunction(action);
- return addDistanceFunction(distanceFunction, analysisDriver);
- }
-
- /**
- * Adds a DistanceFunction to the AnalysisDriver supplied.
- * Only AnalysisDrivers that extend the NeighborAnalysisDriver can be used
- *
- * @param distanceFunction - the DistanceFunction you want to add
- * @param analysisDriver - a reference to the AnalysisDriver you want the distance added to
- * @return - a reference to the generated DistanceFunction
- */
- public DistanceFunction addDistanceFunction(DistanceFunction distanceFunction, AnalysisDriver analysisDriver) {
- ((NeighborAnalysisDriver) analysisDriver).setDistance(distanceFunction);
- return distanceFunction;
- }
-
- /**
- * @param action - unique identifier for the AnalysisDriver you want to add
- * @param analysisDriver - a reference to the NonDistanceDependentAnalysisDriver you want
- * the other driver added to
- */
- public void addAnalysisDriverAsParamToOther(String action, NonDistanceDependentAnalysisDriver analysisDriver)
- throws Exception {
- analysisDriver.setAnalysisDriver(AnalysisDrivers.getAnalysisDriver(action));
- }
-
- /**
- * Get a List of All AnalysisDrivers currently loaded on the system
- * @return List of All AnalysisDrivers
- */
- public List getAnalysisDrivers() {
- return analysisDrivers;
- }
-
- /**
- * Get the current Language JGAAP is set to be working on
- * @return
- */
- public Language getLanguage(){
- return language;
- }
-
- /**
- * Set the Language that JGAAP will operate in.
- * This restricts what methods are available, changes the charset that is expected when reading files, and will add any pre-processing that is needed
- * @param action - the Language to operate under
- * @return - a Reference to the language object selected
- * @throws Exception - if the language cannot be found or cannot be instanced
- */
- public Language setLanguage(String action) throws Exception {
- language = Languages.getLanguage(action);
- return language;
- }
-
- /**
- * Pipelines the independent aspects of loading and processing a document into separate threads
- *
- * Load the text from disk or the web
- * Take into account any special treatment based on the language currently selected
- * Place the text into canonical form using the Canonicizers
- * Use the EventDrivers to transform the text into EventSets
- *
- * @throws Exception
- */
- private void loadCanonicizeEventify() throws Exception{
- List> documentsProcessing = new ArrayList>(documents.size());
- for(final Document document : documents){
- Callable work = new Callable() {
- @Override
- public Document call() throws Exception {
- try {
- document.setLanguage(language);
- document.load();
- document.processCanonicizers();
- for (EventDriver eventDriver : eventDrivers) {
- char[] text = document.getText();
- for(Canonicizer canonicizer : eventDriver.getCanonicizers()){
- text = canonicizer.process(text);
- }
- try{
- document.addEventSet(eventDriver,eventDriver.createEventSet(text));
- } catch (EventGenerationException e) {
- logger.error("Could not Eventify with "+eventDriver.displayName()+" on File:"+document.getFilePath()+" Title:"+document.getTitle(),e);
- throw new Exception("Could not Eventify with "+eventDriver.displayName()+" on File:"+document.getFilePath()+" Title:"+document.getTitle(),e);
- }
- }
- document.setText("");
- } catch (LanguageParsingException e) {
- logger.fatal("Could not Parse Language: "+language.displayName()+" on File:"+document.getFilePath()+" Title:"+document.getTitle(),e);
- document.failed();
- } catch (CanonicizationException e) {
- logger.fatal("Could not Canonicize File: "+document.getFilePath()+" Title:"+document.getTitle(),e);
- document.failed();
- } catch (Exception e) {
- logger.fatal("Could not load File: "+document.getFilePath()+" Title:"+document.getTitle(),e);
- document.failed();
- }
- return document;
- }
- };
- documentsProcessing.add(executor.submit(work));
- }
-
- while(true){
- if(documentsProcessing.size()==0){
- break;
- }else {
- Iterator> documentIterator = documentsProcessing.iterator();
- while(documentIterator.hasNext()){
- Future futureDocument = documentIterator.next();
- if(futureDocument.isDone()){
- Document document = futureDocument.get();
- if(document.hasFailed()){
- throw new Exception("One or more documents could not be read / parsed / canonicized Experiment Failed");
- }
- logger.info("Document: "+document.getTitle()+" has finished processing.");
- documentIterator.remove();
- }
- }
- }
- }
- }
-
- /**
- * Events are culled from EventSets across all Documents on a per EventDriver basis
- * @throws EventCullingException
- * @throws ExecutionException
- * @throws InterruptedException
- */
- private void cull() throws EventCullingException, InterruptedException, ExecutionException {
- List> futureEventDrivers = new ArrayList>();
- for (EventDriver eventDriver : eventDrivers) {
- if (!eventDriver.getEventCullers().isEmpty()) {
- futureEventDrivers.add(executor.submit(new Culling(eventDriver)));
- }
- }
- while(futureEventDrivers.size() != 0) {
- Iterator> iterator = futureEventDrivers.iterator();
- while(iterator.hasNext()) {
- Future futureEventDriver = iterator.next();
- if(futureEventDriver.isDone()){
- EventDriver eventDriver = futureEventDriver.get();
- logger.info("Finished Culling "+eventDriver.displayName());
- iterator.remove();
- }
- }
- }
- }
-
- /**
- * All loaded AnalysisDrivers are run over All EventSets comparing the Unknown(sample) to the Known(training) Documents.
- */
- private void analyze() throws AnalyzeException {
- List knownDocuments = new ArrayList();
- List unknownDocuments = new ArrayList();
- for (Document document : documents) {
- if (document.isAuthorKnown()) {
- knownDocuments.add(document);
- } else {
- unknownDocuments.add(document);
- }
- }
- for (AnalysisDriver analysisDriver : analysisDrivers) {
- logger.info("Training " + analysisDriver.displayName());
- analysisDriver.train(knownDocuments);
- logger.info("Finished Training "+analysisDriver.displayName());
- List> futureDocuments = new ArrayList>();
- if (analysisDriver instanceof ValidationDriver ||
- analysisDriver instanceof LeaveOneOutNoDistanceDriver) {
- for (Document knownDocument : knownDocuments) {
- futureDocuments.add(executor.submit(new AnalysisWorker(knownDocument, analysisDriver)));
- }
- } else if (analysisDriver instanceof WEKAAnalysisDriver){
- for (Document unknownDocument : unknownDocuments){
- logger.info("Begining Analyzing: " + unknownDocument.toString());
- unknownDocument.addResult(analysisDriver, analysisDriver.analyze(unknownDocument));
- logger.info("Finished Analyzing: "+unknownDocument.toString());
- }
- } else {
- for (Document unknownDocument : unknownDocuments) {
- futureDocuments.add(executor.submit(new AnalysisWorker(unknownDocument, analysisDriver)));
- }
- }
- //await analysis to finish
- while(futureDocuments.size() != 0){
- Iterator> iterator = futureDocuments.iterator();
- while(iterator.hasNext()) {
- Future futureDocument = iterator.next();
- if(futureDocument.isDone()) {
- iterator.remove();
- }
- }
- }
- logger.info("Finished Analysis with "+analysisDriver.displayName());
- }
- }
-
- /**
- * Performs the canonicize eventify cull and analyze methods since a strict order has to be enforced when using them
- * @throws Exception
- */
- public void execute() throws Exception {
- clearData();
- executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
- loadCanonicizeEventify();
- cull();
- analyze();
- executor.shutdown();
- executor.awaitTermination(5, TimeUnit.SECONDS);
- }
-
- /**
- * Removes canonicizors from all documents
- */
- public void clearCanonicizers() {
- for(Document document : documents){
- document.clearCanonicizers();
- }
- }
-
- /**
- * Removes all Generated data from a run but leaves all settings untouched
- */
- public void clearData() {
- for(Document document : documents){
- document.clearEventSets();
- document.clearResults();
- }
- }
-
- private class Culling implements Callable {
- private EventDriver eventDriver;
- private ExecutorService cullingExecutor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
-
- Culling(EventDriver eventDriver) {
- this.eventDriver = eventDriver;
- }
-
- @Override
- public EventDriver call() throws Exception {
- List eventSets = new ArrayList();
- for(Document document : documents){
- eventSets.add(document.getEventSet(eventDriver));
- }
- for(EventCuller culler : eventDriver.getEventCullers()) {
- culler.init(eventSets);
- List> futureEventSets = new ArrayList>(eventSets.size());
- for(EventSet eventSet : eventSets) {
- futureEventSets.add(cullingExecutor.submit(new CullerWorker(eventSet, culler)));
- }
- eventSets.clear();
- for(Future futureEventSet : futureEventSets) {
- eventSets.add(futureEventSet.get());
- }
- }
- cullingExecutor.shutdown();
- for(int i = 0; i < documents.size(); i++) {
- documents.get(i).addEventSet(eventDriver, eventSets.get(i));
- }
- return eventDriver;
- }
- }
-
- private class CullerWorker implements Callable {
- private EventSet eventSet;
- private EventCuller culler;
-
- CullerWorker(EventSet eventSet, EventCuller culler) {
- this.eventSet = eventSet;
- this.culler = culler;
- }
-
- public EventSet call() {
- return culler.cull(eventSet);
- }
- }
-
- private class AnalysisWorker implements Callable {
- private Document document;
- private AnalysisDriver analysisDriver;
-
- AnalysisWorker(Document document, AnalysisDriver analysisDriver){
- this.document = document;
- this.analysisDriver = analysisDriver;
- }
-
- @Override
- public Document call() throws Exception {
- logger.info("Begining Analyzing: " + document.toString());
- document.addResult(analysisDriver, analysisDriver.analyze(document));
- logger.info("Finished Analyzing: "+document.toString());
- return document;
- }
- }
-}
+/*
+ * JGAAP -- a graphical program for stylometric authorship attribution
+ * Copyright (C) 2009,2011 by Patrick Juola
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+package com.jgaap.backend;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.log4j.Logger;
+
+import com.jgaap.classifiers.LeaveOneOutNoDistanceDriver;
+import com.jgaap.generics.AnalysisDriver;
+import com.jgaap.generics.AnalyzeException;
+import com.jgaap.generics.CanonicizationException;
+import com.jgaap.generics.Canonicizer;
+import com.jgaap.generics.DistanceFunction;
+import com.jgaap.generics.EventCuller;
+import com.jgaap.generics.EventCullingException;
+import com.jgaap.generics.EventDriver;
+import com.jgaap.generics.EventGenerationException;
+import com.jgaap.generics.Language;
+import com.jgaap.generics.LanguageParsingException;
+import com.jgaap.generics.NeighborAnalysisDriver;
+import com.jgaap.generics.NonDistanceDependentAnalysisDriver;
+import com.jgaap.generics.ValidationDriver;
+import com.jgaap.generics.WEKAAnalysisDriver;
+import com.jgaap.languages.English;
+import com.jgaap.util.Document;
+import com.jgaap.util.EventSet;
+
+/**
+ *
+ * This class provides a simple interface into jgaap for use in
+ * other software packages and for development of any human interfaces.
+ *
+ * Instructions for using the JGAAP API:
+ *
+ * First add documents both known and unknown
+ *
+ * All other settings can be performed in any order which are setLanguage, addCanonicizer,
+ * addEventDriver, addEventCuller, addAnalysisDriver, addDistanceFunction
+ * Note: of the settings only one EventDriver and one AnalysisDriver are required to run an experiment
+ *
+ * The execute method is then used to start the experiment running
+ *
+ * Results are placed in unknown documents to access them simple use the getUnknownDocuments method in the API
+ * The results can be retrieved as a List> this is a sorted list
+ * from most likely to least likely author followed by a score generated based on your settings using the getRawResult method
+ * You can also get a Map of Maps of the raw results (Map>>) with the getRawResults method
+ * They can also be retrieved as a string using either the getFormattedResult or getResult methods.
+ *
+ * For examples of how to use the API class see the com.jgaap.ui package for a GUI example
+ * or the com.jgaap.backend.CLI class for a command line example
+ *
+ * @author Michael Ryan
+ * @since 5.0.0
+ */
+public class API {
+
+ static Logger logger = Logger.getLogger(API.class);
+
+ private List documents;
+ private Language language;
+ private List eventDrivers;
+ private List eventCullers;
+ private List analysisDrivers;
+ private ExecutorService executor;
+
+ private static final API INSTANCE = new API();
+
+ private API() {
+ documents = new ArrayList();
+ language = new English();
+ eventDrivers = new ArrayList();
+ eventCullers = new ArrayList();
+ analysisDrivers = new ArrayList();
+ }
+
+ /**
+ * This allows a singleton of the api to be used in the gui
+ * or any program that needs to access a single copy of JGAAP
+ * from multiple classes
+ *
+ * @return a reference to the singleton API
+ */
+ public static API getInstance(){
+ return INSTANCE;
+ }
+
+ /**
+ * This is a unique instance of the api to be used when running
+ * bulk experiments and you want to reset everything or if you
+ * want to thread running more than one experiment at a time
+ * as in the class com.jgaap.backend.ExperimentEngine
+ *
+ * @return a unique API instance
+ */
+ public static API getPrivateInstance(){
+ return new API();
+ }
+
+ /**
+ *
+ * This allows for the addition of documents to the system.
+ * Both Training (known) and Sample (unknown) documents must be provided before running an experiment.
+ * Training Documents are added by providing an author(tag) for them.
+ * Sample documents are added when no author(tag) is given.
+ *
+ * @param filepath - the system file path or URL to a document
+ * @param author - the author of this document or the tag being applied to this document, if null or the empty string this document is considered unknown and is one of those classified
+ * @param title - Some means of identifying the document, if null or the empty string are provided a title will be generated from the file name
+ * @return - a reference to the document generated
+ * @throws Exception - if there is a problem loading the document from file web or parsing file format
+ */
+ public Document addDocument(String filepath, String author, String title)
+ throws Exception {
+ Document document = new Document(filepath, author, title);
+ return addDocument(document);
+ }
+
+ /**
+ * Adds a previously generated document to the jgaap system.
+ *
+ * @param document - a file that has already been loaded as a Document
+ * @return - a reference to the document generated
+ */
+ public Document addDocument(Document document) {
+ documents.add(document);
+ logger.info("Adding Document "+document.toString());
+ return document;
+ }
+
+ /**
+ * Removes a document from the system.
+ *
+ * @param document - a reference to the document that is to be removed
+ * @return - true on success false on failure
+ */
+ public Boolean removeDocument(Document document) {
+ logger.info("Removing Document "+document.toString());
+ return documents.remove(document);
+ }
+
+ /**
+ * Removes all documents loaded into the system.
+ */
+ public void removeAllDocuments() {
+ logger.info("Removing all Documents");
+ documents.clear();
+ }
+
+ /**
+ * Get a List of all Documents currently loaded into jgaap
+ *
+ * @return - a List of Documents loaded into the system
+ */
+ public List getDocuments() {
+ return documents;
+ }
+
+ /**
+ * Get a List of all currently loaded Documents that do not have an author(tag)
+ *
+ * @return List of Documents without authors
+ */
+ public List getUnknownDocuments() {
+ List unknownDocuments = new ArrayList();
+ for (Document document : documents) {
+ if (!document.isAuthorKnown()) {
+ unknownDocuments.add(document);
+ }
+ }
+ return unknownDocuments;
+ }
+
+ /**
+ * Get a List of Documents currently loaded into the system that have a author(tag)
+ *
+ * @return List of Documents with authors
+ */
+ public List getKnownDocuments() {
+ List knownDocuments = new ArrayList();
+ for (Document document : documents) {
+ if (document.isAuthorKnown()) {
+ knownDocuments.add(document);
+ }
+ }
+ return knownDocuments;
+ }
+
+ /**
+ * Get a List of Documents that all have the same author(tag)
+ *
+ * @param author - the author(tag) to select documents on
+ * @return - List of Documents limited by the author provided
+ */
+ public List getDocumentsByAuthor(String author) {
+ List authorDocuments = new ArrayList();
+ for (Document document : documents) {
+ if (document.isAuthorKnown()) {
+ if (author.equalsIgnoreCase(document.getAuthor())) {
+ authorDocuments.add(document);
+ }
+ }
+ }
+ return authorDocuments;
+ }
+
+ /**
+ * Get a List of all unique authors(tags) applied to Known(Training) Documents
+ *
+ * @return List of authors
+ */
+ public List getAuthors() {
+ Set authors = new HashSet();
+ for (Document document : documents) {
+ if (document.isAuthorKnown()) {
+ authors.add(document.getAuthor());
+ }
+ }
+ List authorsList = new ArrayList(authors);
+ Collections.sort(authorsList);
+ return authorsList;
+ }
+
+ /**
+ * Loads the documents from the file system
+ * @throws Exception
+ */
+ public void loadDocuments() throws Exception{
+ for(Document document : documents){
+ document.load();
+ }
+ }
+
+ /**
+ * Adds the specified canonicizer to all documents currently loaded in the system.
+ *
+ * @param action - the unique string name representing a canonicizer (displayName())
+ * @return - a reference to the canonicizer added
+ * @throws Exception - if the canonicizer specified cannot be found or instanced
+ */
+ public Canonicizer addCanonicizer(String action) throws Exception {
+ Canonicizer canonicizer = Canonicizers.getCanonicizer(action);
+ for (Document document : documents) {
+ addCanonicizer(canonicizer, document);
+ }
+ return canonicizer;
+ }
+
+ /**
+ * Adds the specified canonicizer to all Documents that have the DocType docType.
+ *
+ * @param action - the unique string name representing a canonicizer (displayName())
+ * @param docType - The DocType this canonicizer is restricted to
+ * @return - a reference to the canonicizer added
+ * @throws Exception - if the canonicizer specified cannot be found or instanced
+ */
+ public Canonicizer addCanonicizer(String action, Document.Type docType) throws Exception {
+ Canonicizer canonicizer = Canonicizers.getCanonicizer(action);
+ for (Document document : documents) {
+ if (document.getDocType().equals(docType)) {
+ addCanonicizer(canonicizer, document);
+ }
+ }
+ return canonicizer;
+ }
+
+ /**
+ * Add the Canonicizer specified to the document referenced.
+ *
+ * @param action - the unique string name representing a canonicizer (displayName())
+ * @param document - the Document to add the canonicizer to
+ * @return - a reference to the canonicizer added
+ * @throws Exception - if the canonicizer specified cannot be found or instanced
+ */
+ public Canonicizer addCanonicizer(String action, Document document)
+ throws Exception {
+ Canonicizer canonicizer = Canonicizers.getCanonicizer(action);
+ return addCanonicizer(canonicizer, document);
+ }
+
+ /**
+ * Add the Canonicizer specified to the document referenced.
+ *
+ * @param canonicizer - the canonicizer to add
+ * @param document - the Document to add the canonicizer to
+ * @return - a reference to the canonicizer added
+ */
+ public Canonicizer addCanonicizer(Canonicizer canonicizer, Document document) {
+ document.addCanonicizer(canonicizer);
+ logger.info("Adding Canonicizer "+canonicizer.displayName()+" to Document "+document.toString());
+ return canonicizer;
+ }
+
+ public Canonicizer addCanonicizer(String action, EventDriver eventDriver) throws Exception {
+ Canonicizer canonicizer = Canonicizers.getCanonicizer(action);
+ return addCanonicizer(canonicizer, eventDriver);
+ }
+
+ public Canonicizer addCanonicizer(Canonicizer canonicizer, EventDriver eventDriver) {
+ eventDriver.addCanonicizer(canonicizer);
+ logger.info("Adding Canonicizer "+canonicizer.displayName()+" to EventDriver "+eventDriver.displayName());
+ return canonicizer;
+ }
+
+ /**
+ * Removes the first instance of the canoniciser corresponding to the action(displayName())
+ * from the Document referenced.
+ *
+ * @param canonicizer - the canonicizer to be removed
+ * @param document - a reference to the Document to remove the canonicizer from
+ */
+ public void removeCanonicizer(Canonicizer canonicizer, Document document) {
+ document.removeCanonicizer(canonicizer);
+ }
+
+ public void removeCanonicizer(Canonicizer canonicizer, EventDriver eventDriver) {
+ eventDriver.removeCanonicizer(canonicizer);
+ }
+
+ /**
+ * Removes the first occurrence of the canonicizer corresponding to the action(displayName())
+ * from every document
+ *
+ * @param canonicizer - the canonicizer to be removed
+ */
+ public void removeCanonicizer(Canonicizer canonicizer) {
+ for (Document document : documents) {
+ removeCanonicizer(canonicizer, document);
+ }
+ }
+
+ /**
+ * Removes the first occurrence of the canonicizer from every Document of the DocType docType
+ *
+ * @param canonicizer - the canonicizer to be removed
+ * @param docType - the DocType to remove the canonicizer from
+ */
+ public void removeCanonicizer(Canonicizer canonicizer, Document.Type docType) {
+ for (Document document : documents) {
+ if (document.getDocType().equals(docType)) {
+ removeCanonicizer(canonicizer, document);
+ }
+ }
+ }
+
+ /**
+ * Removes all canonicizers from Documents with the DocType docType
+ *
+ * @param docType - the DocType to remove canonicizers from
+ */
+ public void removeAllCanonicizers(Document.Type docType) {
+ for (Document document : documents) {
+ document.clearCanonicizers();
+ }
+ }
+
+ /**
+ * Removes all canonicizers from All Documents loaded in the system
+ */
+ public void removeAllCanonicizers() {
+ for (Document document : documents) {
+ document.clearCanonicizers();
+ }
+ }
+
+ /**
+ * Add an Event Driver which will be used to
+ * eventify(Generate a List of Events order in the sequence they are found in the document)
+ * all of the documents
+ * @param action - the identifier for the EventDriver to add (displayName())
+ * @return - a reference to the added EventDriver
+ * @throws Exception - If the action is not found or the EventDriver cannot be instanced
+ */
+ public EventDriver addEventDriver(String action) throws Exception {
+ EventDriver eventDriver = EventDrivers.getEventDriver(action);
+ return addEventDriver(eventDriver);
+ }
+
+ /**
+ * Add an Event Driver which will be used to
+ * eventify(Generate a List of Events order in the sequence they are found in the document)
+ * all of the documents
+ * @param eventDriver - the EventDriver to add
+ * @return - a reference to the added EventDriver
+ */
+ public EventDriver addEventDriver(EventDriver eventDriver) {
+ eventDrivers.add(eventDriver);
+ logger.info("Adding EventDriver "+eventDriver.displayName());
+ return eventDriver;
+ }
+
+ /**
+ * Removes the Event Driver reference from the system
+ * @param eventDriver - the EventDriver to be removed
+ * @return - true if successful false if failure
+ */
+ public Boolean removeEventDriver(EventDriver eventDriver) {
+ logger.info("Removing EventDriver "+eventDriver.displayName());
+ return eventDrivers.remove(eventDriver);
+ }
+
+ /**
+ * Removes all EventDrivers from the system
+ */
+ public void removeAllEventDrivers() {
+ eventDrivers.clear();
+ for (Document document : documents) {
+ document.clearEventSets();
+ }
+ }
+
+ /**
+ * Gets a List of all EventDrivers currently loaded in the system
+ * @return List of All loaded EventDrivers
+ */
+ public List getEventDrivers() {
+ return eventDrivers;
+ }
+
+ /**
+ * Add an Event Culler to the system
+ *
+ * @param action - unique identifier for the event culler to add (displayName())
+ * @return - a reference to the added event culler
+ * @throws Exception - if the EventCuller cannot be found or cannor be instanced
+ */
+ public EventCuller addEventCuller(String action) throws Exception {
+ EventCuller eventCuller = EventCullers.getEventCuller(action);
+ eventCullers.add(eventCuller);
+ for(EventDriver eventDriver : eventDrivers) {
+ addEventCuller(eventCuller, eventDriver);
+ }
+ return eventCuller;
+ }
+
+ public EventCuller addEventCuller(String action, EventDriver eventDriver) throws Exception {
+ EventCuller eventCuller = EventCullers.getEventCuller(action);
+ return addEventCuller(eventCuller, eventDriver);
+ }
+
+ public EventCuller addEventCuller(EventCuller eventCuller, EventDriver eventDriver) {
+ eventDriver.addCuller(eventCuller);
+ logger.info("Adding EventCuller "+eventCuller.displayName()+" to "+eventDriver.displayName());
+ return eventCuller;
+ }
+
+ /**
+ * Remove the supplied EventCuller from the system
+ *
+ * @param eventCuller - EventCuller to be removed
+ * @return - true if success false if failure
+ */
+ public Boolean removeEventCuller(EventCuller eventCuller) {
+ logger.info("Removing EventCuller "+eventCuller.displayName());
+ eventCullers.remove(eventCuller);
+ for(EventDriver eventDriver : eventDrivers){
+ eventDriver.removeCuller(eventCuller);
+ }
+ return true;
+ }
+
+ /**
+ * Removes all loaded EventCullers from the system
+ */
+ public void removeAllEventCullers() {
+ eventCullers.clear();
+ for(EventDriver eventDriver : eventDrivers){
+ eventDriver.clearCullers();
+ }
+ }
+
+ /**
+ * Get a List of all EventCullers currently loaded in the system
+ * @return List of EventCullers loaded
+ */
+ public List getEventCullers() {
+ return eventCullers;
+ }
+
+ /**
+ * Add an AnalysisDriver to the system as referenced by the action.
+ *
+ * @param action - the unique identifier for a AnalysisDriver (alternately a DistanceFunction)
+ * @return - a reference to the generated Analysis Driver
+ * @throws Exception - If the AnalysisDriver cannot be found or if it cannot be instanced
+ */
+ public AnalysisDriver addAnalysisDriver(String action) throws Exception {
+ AnalysisDriver analysisDriver = AnalysisDrivers.getAnalysisDriver(action);
+ return addAnalysisDriver(analysisDriver);
+ }
+
+ public AnalysisDriver addAnalysisDriver(AnalysisDriver analysisDriver) {
+ logger.info("Adding AnalysisDriver "+analysisDriver.displayName());
+ analysisDrivers.add(analysisDriver);
+ return analysisDriver;
+ }
+
+ /**
+ * Removed the passed AnalysisDriver from the system
+ * @param analysisDriver - reference to the AnalysisDriver to be removed
+ * @return True if success false if failure
+ */
+ public Boolean removeAnalysisDriver(AnalysisDriver analysisDriver) {
+ logger.info("Removing AnalysisDriver "+analysisDriver.displayName());
+ return analysisDrivers.remove(analysisDriver);
+ }
+
+ /**
+ * Removes all AnalysisDrivers from the system
+ */
+ public void removeAllAnalysisDrivers() {
+ analysisDrivers.clear();
+ }
+
+ /**
+ * Adds a DistanceFunction to the AnalysisDriver supplied.
+ * Only AnalysisDrivers that extend the NeighborAnalysisDriver can be used
+ *
+ * @param action - unique identifier for the DistanceFunction you want to add
+ * @param analysisDriver - a reference to the AnalysisDriver you want the distance added to
+ * @return - a reference to the generated DistanceFunction
+ * @throws Exception - if the AnalysisDriver does not extend NeighborAnalysisDriver or if the DistanceFunction cannot be found the DistanceFunction cannot be instanced
+ */
+ public DistanceFunction addDistanceFunction(String action,
+ AnalysisDriver analysisDriver) throws Exception {
+ DistanceFunction distanceFunction = DistanceFunctions
+ .getDistanceFunction(action);
+ return addDistanceFunction(distanceFunction, analysisDriver);
+ }
+
+ /**
+ * Adds a DistanceFunction to the AnalysisDriver supplied.
+ * Only AnalysisDrivers that extend the NeighborAnalysisDriver can be used
+ *
+ * @param distanceFunction - the DistanceFunction you want to add
+ * @param analysisDriver - a reference to the AnalysisDriver you want the distance added to
+ * @return - a reference to the generated DistanceFunction
+ */
+ public DistanceFunction addDistanceFunction(DistanceFunction distanceFunction, AnalysisDriver analysisDriver) {
+ ((NeighborAnalysisDriver) analysisDriver).setDistance(distanceFunction);
+ return distanceFunction;
+ }
+
+ /**
+ * @param action - unique identifier for the AnalysisDriver you want to add
+ * @param analysisDriver - a reference to the NonDistanceDependentAnalysisDriver you want
+ * the other driver added to
+ */
+ public void addAnalysisDriverAsParamToOther(String action, NonDistanceDependentAnalysisDriver analysisDriver)
+ throws Exception {
+ analysisDriver.setAnalysisDriver(AnalysisDrivers.getAnalysisDriver(action));
+ }
+
+ /**
+ * Get a List of All AnalysisDrivers currently loaded on the system
+ * @return List of All AnalysisDrivers
+ */
+ public List getAnalysisDrivers() {
+ return analysisDrivers;
+ }
+
+ /**
+ * Get the current Language JGAAP is set to be working on
+ * @return
+ */
+ public Language getLanguage(){
+ return language;
+ }
+
+ /**
+ * Set the Language that JGAAP will operate in.
+ * This restricts what methods are available, changes the charset that is expected when reading files, and will add any pre-processing that is needed
+ * @param action - the Language to operate under
+ * @return - a Reference to the language object selected
+ * @throws Exception - if the language cannot be found or cannot be instanced
+ */
+ public Language setLanguage(String action) throws Exception {
+ language = Languages.getLanguage(action);
+ return language;
+ }
+
+ /**
+ * Pipelines the independent aspects of loading and processing a document into separate threads
+ *
+ * Load the text from disk or the web
+ * Take into account any special treatment based on the language currently selected
+ * Place the text into canonical form using the Canonicizers
+ * Use the EventDrivers to transform the text into EventSets
+ *
+ * @throws Exception
+ */
+ private void loadCanonicizeEventify() throws Exception{
+ List> documentsProcessing = new ArrayList>(documents.size());
+ for(final Document document : documents){
+ Callable work = new Callable() {
+ @Override
+ public Document call() throws Exception {
+ try {
+ document.setLanguage(language);
+ document.load();
+ document.processCanonicizers();
+ for (EventDriver eventDriver : eventDrivers) {
+ char[] text = document.getText();
+ for(Canonicizer canonicizer : eventDriver.getCanonicizers()){
+ text = canonicizer.process(text);
+ }
+ try{
+ document.addEventSet(eventDriver,eventDriver.createEventSet(text));
+ } catch (EventGenerationException e) {
+ logger.error("Could not Eventify with "+eventDriver.displayName()+" on File:"+document.getFilePath()+" Title:"+document.getTitle(),e);
+ throw new Exception("Could not Eventify with "+eventDriver.displayName()+" on File:"+document.getFilePath()+" Title:"+document.getTitle(),e);
+ }
+ }
+ document.setText("");
+ } catch (LanguageParsingException e) {
+ logger.fatal("Could not Parse Language: "+language.displayName()+" on File:"+document.getFilePath()+" Title:"+document.getTitle(),e);
+ document.failed();
+ } catch (CanonicizationException e) {
+ logger.fatal("Could not Canonicize File: "+document.getFilePath()+" Title:"+document.getTitle(),e);
+ document.failed();
+ } catch (Exception e) {
+ logger.fatal("Could not load File: "+document.getFilePath()+" Title:"+document.getTitle(),e);
+ document.failed();
+ }
+ return document;
+ }
+ };
+ documentsProcessing.add(executor.submit(work));
+ }
+
+ while(true){
+ if(documentsProcessing.size()==0){
+ break;
+ }else {
+ Iterator> documentIterator = documentsProcessing.iterator();
+ while(documentIterator.hasNext()){
+ Future futureDocument = documentIterator.next();
+ if(futureDocument.isDone()){
+ Document document = futureDocument.get();
+ if(document.hasFailed()){
+ throw new Exception("One or more documents could not be read / parsed / canonicized Experiment Failed");
+ }
+ logger.info("Document: "+document.getTitle()+" has finished processing.");
+ documentIterator.remove();
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Events are culled from EventSets across all Documents on a per EventDriver basis
+ * @throws EventCullingException
+ * @throws ExecutionException
+ * @throws InterruptedException
+ */
+ private void cull() throws EventCullingException, InterruptedException, ExecutionException {
+ List> futureEventDrivers = new ArrayList>();
+ for (EventDriver eventDriver : eventDrivers) {
+ if (!eventDriver.getEventCullers().isEmpty()) {
+ futureEventDrivers.add(executor.submit(new Culling(eventDriver)));
+ }
+ }
+ while(futureEventDrivers.size() != 0) {
+ Iterator> iterator = futureEventDrivers.iterator();
+ while(iterator.hasNext()) {
+ Future futureEventDriver = iterator.next();
+ if(futureEventDriver.isDone()){
+ EventDriver eventDriver = futureEventDriver.get();
+ logger.info("Finished Culling "+eventDriver.displayName());
+ iterator.remove();
+ }
+ }
+ }
+ }
+
+ /**
+ * All loaded AnalysisDrivers are run over All EventSets comparing the Unknown(sample) to the Known(training) Documents.
+ */
+ private void analyze() throws AnalyzeException {
+ List knownDocuments = new ArrayList();
+ List unknownDocuments = new ArrayList();
+ for (Document document : documents) {
+ if (document.isAuthorKnown()) {
+ knownDocuments.add(document);
+ } else {
+ unknownDocuments.add(document);
+ }
+ }
+ for (AnalysisDriver analysisDriver : analysisDrivers) {
+ List> futureDocuments = new ArrayList>();
+ if(analysisDriver instanceof ValidationDriver) { //For some reason, if we unify ValidationDriver and LeaveOneOutNoDistanceDriver into the same if statement, it doesn't work.
+ for (Document knownDocument : knownDocuments) {
+ List knownDocuments2 = new ArrayList();
+ for(Document knownDocument2 : knownDocuments){
+//This is messy and time-consuming, but setting knownDocuments2 = knownDocuments and then removing knownDocument from knownDocuments2 doesn't work, not sure why.
+ if(!knownDocument2.equals(knownDocument))
+ knownDocuments2.add(knownDocument2);
+ }
+ logger.info("Training " + analysisDriver.displayName());
+ analysisDriver.train(knownDocuments2);
+ logger.info("Finished Training "+analysisDriver.displayName());
+ futureDocuments.add(executor.submit(new AnalysisWorker(knownDocument, analysisDriver)));
+ //await analysis to finish
+ while(futureDocuments.size() != 0){
+ Iterator> iterator = futureDocuments.iterator();
+ while(iterator.hasNext()) {
+ Future futureDocument = iterator.next();
+ if(futureDocument.isDone()) {
+ iterator.remove();
+ }
+ }
+
+ }
+ }
+ } else if(analysisDriver instanceof LeaveOneOutNoDistanceDriver) {
+ if(analysisDriver.displayName().contains("Weighted Voting")) {
+ analysisDriver.train(knownDocuments);
+ for(Document knownDocument : knownDocuments) { //Weighted voting is NOT training on test here
+ //when the user trains weighted voting, all it does is assign the weights to the classifiers by implementing LOOCV
+ // Upon calling analyze, it will train its classifiers for analysis
+ analysisDriver.analyze(knownDocument);
+ }
+ }
+ else {
+ for (Document knownDocument : knownDocuments) {
+ List knownDocuments2 = new ArrayList();
+ for(Document knownDocument2 : knownDocuments){
+ if(!knownDocument2.equals(knownDocument))
+ knownDocuments2.add(knownDocument2);
+ }
+ logger.info("Training " + analysisDriver.displayName());
+ analysisDriver.train(knownDocuments2);
+ logger.info("Finished Training "+analysisDriver.displayName());
+ futureDocuments.add(executor.submit(new AnalysisWorker(knownDocument, analysisDriver)));
+ //await analysis to finish
+ while(futureDocuments.size() != 0){
+ Iterator> iterator = futureDocuments.iterator();
+ while(iterator.hasNext()) {
+ Future futureDocument = iterator.next();
+ if(futureDocument.isDone()) {
+ iterator.remove();
+ }
+ }
+
+ }
+ }
+ }
+
+ } else if (analysisDriver instanceof WEKAAnalysisDriver){
+ logger.info("Training " + analysisDriver.displayName());
+ analysisDriver.train(knownDocuments);
+ logger.info("Finished Training "+analysisDriver.displayName());
+ for (Document unknownDocument : unknownDocuments){
+ logger.info("Begining Analyzing: " + unknownDocument.toString());
+ unknownDocument.addResult(analysisDriver, analysisDriver.analyze(unknownDocument));
+ logger.info("Finished Analyzing: "+unknownDocument.toString());
+ }
+ } else {
+ logger.info("Training " + analysisDriver.displayName());
+ analysisDriver.train(knownDocuments);
+ logger.info("Finished Training "+analysisDriver.displayName());
+ for (Document unknownDocument : unknownDocuments) {
+ futureDocuments.add(executor.submit(new AnalysisWorker(unknownDocument, analysisDriver)));
+ }
+ //await analysis to finish
+ while(futureDocuments.size() != 0){
+ Iterator> iterator = futureDocuments.iterator();
+ while(iterator.hasNext()) {
+ Future futureDocument = iterator.next();
+ if(futureDocument.isDone()) {
+ iterator.remove();
+ }
+ }
+
+ }
+ }
+ logger.info("Finished Analysis with "+analysisDriver.displayName());
+ }
+ }
+
+ /**
+ * Performs the canonicize eventify cull and analyze methods since a strict order has to be enforced when using them
+ * @throws Exception
+ */
+ public void execute() throws Exception {
+ clearData();
+ executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
+ loadCanonicizeEventify();
+ cull();
+ analyze();
+ executor.shutdown();
+ executor.awaitTermination(5, TimeUnit.SECONDS);
+ }
+
+ /**
+ * Removes canonicizors from all documents
+ */
+ public void clearCanonicizers() {
+ for(Document document : documents){
+ document.clearCanonicizers();
+ }
+ }
+
+ /**
+ * Removes all Generated data from a run but leaves all settings untouched
+ */
+ public void clearData() {
+ for(Document document : documents){
+ document.clearEventSets();
+ document.clearResults();
+ }
+ }
+
+ private class Culling implements Callable {
+ private EventDriver eventDriver;
+ private ExecutorService cullingExecutor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
+
+ Culling(EventDriver eventDriver) {
+ this.eventDriver = eventDriver;
+ }
+
+ @Override
+ public EventDriver call() throws Exception {
+ List eventSets = new ArrayList();
+ for(Document document : documents){
+ eventSets.add(document.getEventSet(eventDriver));
+ }
+ for(EventCuller culler : eventDriver.getEventCullers()) {
+ culler.init(eventSets);
+ List> futureEventSets = new ArrayList>(eventSets.size());
+ for(EventSet eventSet : eventSets) {
+ futureEventSets.add(cullingExecutor.submit(new CullerWorker(eventSet, culler)));
+ }
+ eventSets.clear();
+ for(Future futureEventSet : futureEventSets) {
+ eventSets.add(futureEventSet.get());
+ }
+ }
+ cullingExecutor.shutdown();
+ for(int i = 0; i < documents.size(); i++) {
+ documents.get(i).addEventSet(eventDriver, eventSets.get(i));
+ }
+ return eventDriver;
+ }
+ }
+
+ private class CullerWorker implements Callable {
+ private EventSet eventSet;
+ private EventCuller culler;
+
+ CullerWorker(EventSet eventSet, EventCuller culler) {
+ this.eventSet = eventSet;
+ this.culler = culler;
+ }
+
+ public EventSet call() {
+ return culler.cull(eventSet);
+ }
+ }
+
+ private class AnalysisWorker implements Callable {
+ private Document document;
+ private AnalysisDriver analysisDriver;
+
+ AnalysisWorker(Document document, AnalysisDriver analysisDriver){
+ this.document = document;
+ this.analysisDriver = analysisDriver;
+ }
+
+ @Override
+ public Document call() throws Exception {
+ logger.info("Begining Analyzing: " + document.toString());
+ document.addResult(analysisDriver, analysisDriver.analyze(document));
+ logger.info("Finished Analyzing: "+document.toString());
+ return document;
+ }
+ }
+}
diff --git a/src/com/jgaap/classifiers/BurrowsDelta.java b/src/com/jgaap/classifiers/BurrowsDelta.java
index 0df29c9a4..7b5f3d63d 100644
--- a/src/com/jgaap/classifiers/BurrowsDelta.java
+++ b/src/com/jgaap/classifiers/BurrowsDelta.java
@@ -115,4 +115,4 @@ public List> analyze(Document unknown) {
Collections.sort(results);
return results;
}
-}
+}
\ No newline at end of file
diff --git a/src/com/jgaap/classifiers/KNearestNeighborDriver.java b/src/com/jgaap/classifiers/KNearestNeighborDriver.java
index 85849da32..35376993d 100644
--- a/src/com/jgaap/classifiers/KNearestNeighborDriver.java
+++ b/src/com/jgaap/classifiers/KNearestNeighborDriver.java
@@ -49,9 +49,12 @@ public class KNearestNeighborDriver extends NeighborAnalysisDriver {
private static final int DEFAULT_K = 5;
private static final String DEFAULT_TIE = "lastPicked";
-
+
public KNearestNeighborDriver() {
- addParams("k", "K", "5", new String[] {"1","2","3","4","5","6","7","8","9","10"}, false);
+ addParams("k", "K: Number of Neighbors", "5", new String[] { "1", "2",
+ "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
+ "14", "15", "16", "17", "18", "19", "20", "21", "22", "23",
+ "24", "25" }, false);
}
public String displayName() {
@@ -107,7 +110,7 @@ public List> analyze(Document unknown) throws AnalyzeExcept
}
List> results = ballot.getResults();
- Comparator> compareByScore = (Pair r1, Pair r2) -> r2.getSecond().compareTo(r1.getSecond());
+ Comparator> compareByScore = (Pair r1, Pair r2) -> r1.getSecond().compareTo(r2.getSecond());
Collections.sort(results, compareByScore);
return results;
diff --git a/src/com/jgaap/classifiers/LeaveOneOutKNearestNeighborDriver.java b/src/com/jgaap/classifiers/LeaveOneOutKNearestNeighborDriver.java
new file mode 100644
index 000000000..c87b42c7c
--- /dev/null
+++ b/src/com/jgaap/classifiers/LeaveOneOutKNearestNeighborDriver.java
@@ -0,0 +1,144 @@
+/*
+* JGAAP -- a graphical program for stylometric authorship attribution
+ * Copyright (C) 2009,2011 by Patrick Juola
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+package com.jgaap.classifiers;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+
+
+import com.google.common.collect.ImmutableList;
+import com.jgaap.generics.AnalyzeException;
+import com.jgaap.generics.DistanceCalculationException;
+import com.jgaap.generics.ValidationDriver;
+import com.jgaap.util.Ballot;
+import com.jgaap.util.Document;
+import com.jgaap.util.EventMap;
+import com.jgaap.util.Pair;
+
+/*KNN LOOCV implementation by @Alejandro Jorge Napolitano Jawerbaum*/
+
+public class LeaveOneOutKNearestNeighborDriver extends ValidationDriver {
+
+ private java.util.logging.Logger logger = java.util.logging.Logger.getLogger(LeaveOneOutKNearestNeighborDriver.class.getName());
+
+ private ImmutableList> knowns;
+
+ private static final int DEFAULT_K = 5;
+ private static final String DEFAULT_TIE = "lastPicked";
+
+ public LeaveOneOutKNearestNeighborDriver() {
+ addParams("k", "K: Number of Neighbors", "5", new String[] { "1", "2",
+ "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
+ "14", "15", "16", "17", "18", "19", "20", "21", "22", "23",
+ "24", "25" }, false);
+ }
+ @Override
+ public String displayName() {
+ return "Leave One Out K-Nearest Neighbor driver" + this.getDistanceName();
+ }
+ @Override
+ public String tooltipText() {
+ return " ";
+ }
+ @Override
+ public boolean showInGUI() {
+ return true;
+ }
+ @Override
+ public void train(List knowns){
+ ImmutableList.Builder> builder = ImmutableList.builder();
+ for(Document known : knowns) {
+ builder.add(new Pair(known, new EventMap(known)));
+ }
+ this.knowns = builder.build();
+ }
+
+ @Override
+ public List> analyze(Document unknown) throws AnalyzeException {
+
+ Ballot ballot = new Ballot();
+
+ int k = getParameter("k", DEFAULT_K);
+
+ String tieBreaker = getParameter("tieBreaker", DEFAULT_TIE);
+
+ List> rawResults = new ArrayList>();
+
+ for (int i = 0; i < knowns.size(); i++) {
+ if(!knowns.get(i).getFirst().equals(unknown)) {
+ double current;
+ try {
+ current = distance.distance(new EventMap(unknown), knowns.get(i).getSecond());
+ } catch (DistanceCalculationException e) {
+ throw new AnalyzeException("Distance "+distance.displayName()+" failed");
+ }
+ rawResults.add(new Pair(knowns.get(i).getFirst().getAuthor(), current, 2));
+ }
+ else
+ logger.info("Excluded document that's being tested.");
+ }
+ Collections.sort(rawResults);
+ for(int i = 0; i < Math.min(k, rawResults.size()); i++) {
+ Pair p = rawResults.get(i);
+ ballot.vote(p.getFirst(), (1 + Math.pow(2, (-1.0 * (i+1)))));
+ }
+
+ if(tieBreaker.equals("lastPicked")) {
+ ballot.setComparator(new LastPickedComparator());
+ }
+
+ List> results = ballot.getResults();
+ Comparator> compareByScore = (Pair r1, Pair r2) -> r2.getSecond().compareTo(r1.getSecond());
+ Collections.sort(results, compareByScore);
+
+ return results;
+ }
+
+ private static class LastPickedComparator implements Comparator>, Serializable {
+
+ private static final long serialVersionUID = 1L;
+
+ public int compare(Pair firstPair, Pair secondPair) {
+ double first = firstPair.getSecond();
+ double second = secondPair.getSecond();
+
+ // If the overall rank was not the same, then return these according to rank.
+ if((int)first != (int)second) {
+ return (int)first - (int)second;
+ }
+
+ // Otherwise, we want to move the decimal point right until we have an integer.
+ while(((int)first - first) > 0.0000001) {
+ first *= 2;
+ second *= 2;
+ }
+ // If first had fewer decimal places than second, this means the last first vote came BEFORE the last second vote.
+ if(((int)second -second) > 0.0000001) {
+ return 1;
+ }
+ // Otherwise, the last second vote came before the last first vote.
+ else {
+ return -1;
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/com/jgaap/classifiers/LeaveOneOutNoDistanceDriver.java b/src/com/jgaap/classifiers/LeaveOneOutNoDistanceDriver.java
index f64d8c0c7..5579467fd 100644
--- a/src/com/jgaap/classifiers/LeaveOneOutNoDistanceDriver.java
+++ b/src/com/jgaap/classifiers/LeaveOneOutNoDistanceDriver.java
@@ -62,12 +62,14 @@ public List> analyze(Document fakeUnknown) throws AnalyzeEx
// document. We call this document a fake unknown because it is actually known,
// but we want to pretend that it isn't.
List knownsTemp = new ArrayList<>();
- for(Document known : knownDocuments)
- if(known != fakeUnknown)
+ for(Document known : knownDocuments) {
+ if(!known.equals(fakeUnknown))
knownsTemp.add(known);
-
+ }
+ // Set the analysisDriver's parameters.
// Pass the temporary known list and the fake unknown to the analysis driver that this
// driver depends on, and return the result.
+ analysisDriver.setParamGUI(getParamGUI());
analysisDriver.train(knownsTemp);
return analysisDriver.analyze(fakeUnknown);
}
diff --git a/src/com/jgaap/classifiers/WEKALogisticRegression.java b/src/com/jgaap/classifiers/WEKALogisticRegression.java
new file mode 100644
index 000000000..d5a7a2daa
--- /dev/null
+++ b/src/com/jgaap/classifiers/WEKALogisticRegression.java
@@ -0,0 +1,35 @@
+package com.jgaap.classifiers;
+
+import java.util.List;
+
+import com.jgaap.generics.AnalyzeException;
+import com.jgaap.generics.WEKAAnalysisDriver;
+import com.jgaap.util.Document;
+
+import weka.classifiers.Classifier;
+
+public class WEKALogisticRegression extends WEKAAnalysisDriver {
+ @Override
+ public String displayName() {
+ return "WEKA Logistic Regression";
+ }
+
+ @Override
+ public String tooltipText() {
+ return "Multinomial logistic regression, Courtesy of WEKA";
+ }
+
+ @Override
+ public boolean showInGUI() {
+ return true;
+ }
+
+ public Classifier getClassifier() {
+ return (Classifier)(new weka.classifiers.functions.Logistic());
+ }
+ public void testRequirements(List knownList) throws AnalyzeException{
+ //No requirements
+ return;
+ }
+
+}
diff --git a/src/com/jgaap/classifiers/weightedVoting.java b/src/com/jgaap/classifiers/weightedVoting.java
new file mode 100644
index 000000000..c3ee0f828
--- /dev/null
+++ b/src/com/jgaap/classifiers/weightedVoting.java
@@ -0,0 +1,142 @@
+package com.jgaap.classifiers;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import org.apache.log4j.Logger;
+import com.jgaap.backend.AnalysisDrivers;
+import com.jgaap.backend.DistanceFunctions;
+import com.jgaap.generics.AnalysisDriver;
+import com.jgaap.generics.AnalyzeException;
+import com.jgaap.generics.DistanceFunction;
+import com.jgaap.generics.NeighborAnalysisDriver;
+import com.jgaap.generics.ValidationDriver;
+import com.jgaap.util.Document;
+import com.jgaap.util.Pair;
+import com.jgaap.util.WeightingMethod;
+/** @author Alejandro J Napolitano Jawerbaum
+See tooltipText for a short description.
+* weightedVoting weights algorithms' votes (prediction) according to a weighting algorithm. "None" is an option.
+* Using sets instead of arraylists to user-proof it against having the same algorithm vote multiple times.
+*/
+public class weightedVoting extends AnalysisDriver {
+ public Set classifiers = new HashSet();
+ private static Set> weightedClassifiers = new HashSet>();
+ private static Set> weights = new HashSet>();
+ private static Set authors = new HashSet();
+ private static Logger logger = Logger.getLogger(weightedVoting.class);
+ private static List knowns = new ArrayList();
+
+ public weightedVoting() {
+ addParams("Classifiers", "Classifiers to be put to a vote.","Comma-separated list. Add | before parameters.", new String[] {""}, true); //TODO: Get all classifiers and add them to the array, then call each of them
+ addParams("Distances", "Distance metrics for distance dependent Analysis Drivers","Comma-separated list", new String[] {""}, true);
+ addParams("WeightingMethod", "Way to weight the classifiers.", "cross-validation", new String[]{"cross-validation", "accuracyOverSum", "none"}, false);
+ addParams("Cutoff", "Minimum cross-validation score to consider an algorithm's vote.", "75", new String[]{"0", "10", "20","30","40","45","50","55","60","65","70","75","80","85","90","95", "100"}, true);
+ addParams("VotingMethod", "Voting Method.", "sum", new String[] {"sum", "sum/count"}, false);
+ addParams("AuthorsForCrossval", "Comma separated list of Authors to cross-validate. Empty = All.", "", new String[] {}, true);
+ }
+
+ @Override
+ public String displayName() {
+ return "Weighted Voting";
+ }
+
+ @Override
+ public String tooltipText() {
+ return "Takes in a list of analysis drivers, and put them to a vote on each unknown document. Warning: We recommend including independent classifiers only.";
+ }
+
+ @Override
+ public boolean showInGUI() {
+ return true;
+ }
+
+
+ @Override
+ public void train(List knownDocuments) throws AnalyzeException {
+ for(Document doc : knownDocuments)
+ authors.add(doc.getAuthor());
+ knowns = knownDocuments;
+ Set clsfr = new HashSet();
+ for(String s : getParameter("Classifiers").split(",")) {
+ try {
+ AnalysisDriver classifier = AnalysisDrivers.getAnalysisDriver(s.trim());
+ if(classifier instanceof NeighborAnalysisDriver) {
+ NeighborAnalysisDriver classif = (NeighborAnalysisDriver)AnalysisDrivers.getAnalysisDriver(s);;
+ String[] distances = getParameter("Distances").split(",");
+ for(String distance : distances) {
+ DistanceFunction dist = DistanceFunctions.getDistanceFunction(distance);
+ classif.setDistance(dist);
+ clsfr.add(classif);
+ }
+ }
+ else if(!(classifier instanceof LeaveOneOutNoDistanceDriver) && !(classifier instanceof ValidationDriver) && !(classifier instanceof weightedVoting))
+ clsfr.add(classifier);
+ else
+ logger.info("Excluded cross-validation driver. Or worse, a weighted voting inception.");
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+ classifiers = clsfr;
+ weights = WeightingMethod.weight(classifiers, knownDocuments, getParameter("WeightingMethod"), getParameter("AuthorsForCrossval"));
+ Set> weighted = new HashSet>();
+ if(!getParameter("Cutoff").equals("0")) {
+ for(Pair weight : weights)
+ if(weight.getSecond()>=(Double.parseDouble(getParameter("Cutoff"))/100))
+ weighted.add(weight);
+ weightedClassifiers = weighted;
+ }
+ }
+ /**
+ * Analyzes the unknown document and tallies the weighted votes.
+ * @param Document unknownDocument. Pass in the document to be analyzed.
+ * */
+ public Map vote(Document unknownDocument) throws AnalyzeException {
+ List> authorVote = new ArrayList>();
+
+ for(Pair weightedClassifier : weightedClassifiers) {
+ List> results = weightedClassifier.getFirst().analyze(unknownDocument);
+ logger.info(weightedClassifier.getFirst().displayName()+ ". weight = " + weightedClassifier.getSecond() + ". Voted for " + results.get(0).getFirst() + " for document " + unknownDocument.getTitle());
+ authorVote.add(new Pair(results.get(0).getFirst(), weightedClassifier.getSecond()));
+ }
+ //We should check the results for ties, and let the score be 0 for all authors if that is the case.
+ Map authorVoteSumMap = new HashMap();
+ for (String author : authors) {
+ double totalVote = 0.0;
+ for (Pair vote : authorVote) {
+ if (vote.getFirst().contains(author)) {
+ totalVote += vote.getSecond();
+ }
+ }
+ if(!authorVoteSumMap.containsKey(author))
+ authorVoteSumMap.put(author, totalVote);
+
+ }
+ logger.info(authorVoteSumMap);
+ return authorVoteSumMap;
+ }
+
+ @Override
+ public List> analyze(Document unknownDocument) throws AnalyzeException {
+ for(Pair weightedClassifier : weightedClassifiers) {
+ logger.info("Training " + weightedClassifier.getFirst().displayName() + " for analysis");
+ weightedClassifier.getFirst().train(knowns);
+ logger.info("Finished training " + weightedClassifier.getFirst().displayName() + " for analysis");
+ }
+ Map authorVoteSumMap = vote(unknownDocument);
+ Comparator> compareByScore = (Pair r1, Pair r2) -> r2.getSecond().compareTo(r1.getSecond());
+ List> authorVoteSum = new ArrayList>();
+ for(String author : authors)
+ authorVoteSum.add(new Pair(author,authorVoteSumMap.get(author)));
+
+ Collections.sort(authorVoteSum, compareByScore);
+ //Collections.reverse(authorVoteSum);
+ return authorVoteSum;
+ }
+}
diff --git a/src/com/jgaap/util/WeightingMethod.java b/src/com/jgaap/util/WeightingMethod.java
new file mode 100644
index 000000000..981d0c4db
--- /dev/null
+++ b/src/com/jgaap/util/WeightingMethod.java
@@ -0,0 +1,95 @@
+package com.jgaap.util;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.log4j.Logger;
+
+import com.jgaap.generics.AnalysisDriver;
+import com.jgaap.generics.AnalyzeException;
+/**
+ * @author Alejandro J Napolitano Jawerbaum
+ * This class provides support for weightedVoting and other algorithms that put AnalysisDrivers to a vote by weighting said votes.
+ */
+public class WeightingMethod {
+ private static Logger logger = Logger.getLogger(WeightingMethod.class);
+ /**
+ * @param Set classifiers. A set of AnalysisDrivers.
+ * @param Set knownDocuments. These will be used for cross-validation
+ * @param String method. Name of the weighted algorithm.
+ * @param String authors. The authors to cross-validate for. An empty string means all authors will be cross-validated.
+ * @return Set> So as to prevent having duplicate AnalysisDrivers.
+ * This algorithm simply takes in instructions and passes them onto the appropriate weighting method.
+ * Doing this because, in the case of suspected and distractor authors, a user may wish to only take into consideration how accurate an algorithm is at differentiating the suspected authors from each other and the distractor authors.
+ */
+ public static Set> weight(Set classifiers, List knownDocuments, String method, String authors) throws AnalyzeException{
+ if(method.equalsIgnoreCase("cross-validation"))
+ return weightByCrossVal(classifiers, knownDocuments, authors);
+ else if(method.equalsIgnoreCase("accuracyOverSum"))
+ return weightByAccuracyOverSum(classifiers, knownDocuments, authors);
+ else
+ {
+ Set> unweightedClassifiers = new HashSet>();
+ for(AnalysisDriver classifier : classifiers)
+ unweightedClassifiers.add(new Pair(classifier, 1.0));
+ return unweightedClassifiers;
+ }
+
+ }
+ /**
+ * @param Set classifiers. A set of AnalysisDrivers.
+ * @param Set knownDocuments. These will be used for cross-validation
+ * @param String authors. The authors to cross-validate for. An empty string means all authors will be cross-validated.
+ * This algorithm weights by raw LOOCV score.
+ */
+ public static Set> weightByCrossVal(Set classifiers, List knownDocuments, String authors) throws AnalyzeException{//This will be expanded, but for now it weights by LOOCV score.
+ Set> weights = new HashSet>();
+
+ for(AnalysisDriver classifier : classifiers) {
+ Double analysesCounter = 0.0;
+ Double score = 0.0;
+ for (Document knownDocument : knownDocuments) {
+ if(authors.contains(knownDocument.getAuthor()) || authors.equals("")){
+ List knownDocuments2 = new ArrayList();
+ for(Document knownDocument2 : knownDocuments){
+ if(!knownDocument2.equals(knownDocument))
+ knownDocuments2.add(knownDocument2);
+ }
+ logger.info("Training " + classifier.displayName() +" for cross-validation");
+ classifier.train(knownDocuments2);
+ logger.info("Finished Training "+classifier.displayName() + " for cross-validation");
+ logger.info("Begining Analyzing: " + knownDocument.toString() + " for cross-validation");
+ List> results = classifier.analyze(knownDocument);
+ logger.info("Finished Analyzing: "+ knownDocument.toString() + " for cross-validation");
+ Pair result = results.get(0);
+ analysesCounter++;
+ if(result.getFirst().contains(knownDocument.getAuthor()))
+ score++;
+ }
+ }
+ weights.add(new Pair(classifier, score/analysesCounter));
+ }
+ return weights;
+ }
+ /**
+ * @param Set classifiers. A set of AnalysisDrivers.
+ * @param Set knownDocuments. These will be used for cross-validation
+ * @param String authors. The authors to cross-validate for. An empty string means all authors will be cross-validated.
+ * This algorithm weights by raw LOOCV score divided by the total sum of weights.
+ */
+ public static Set> weightByAccuracyOverSum(Set classifiers, List knownDocuments, String authors) throws AnalyzeException{
+ Set> weights = weightByCrossVal(classifiers, knownDocuments, authors);
+ Set> weights2 = new HashSet>();
+ Double sum = 0.0;
+ for(Pair weight : weights)
+ sum+=weight.getSecond();
+ for(Pair weight : weights)
+ weights2.add(new Pair(weight.getFirst(), weight.getSecond()/sum));
+ return weights2;
+
+
+ }
+
+}