Skip to content

Commit

Permalink
extensions bachelor schaefer 1
Browse files Browse the repository at this point in the history
  • Loading branch information
ChristineSchaefer committed Apr 7, 2021
1 parent 3a393eb commit a954f94
Show file tree
Hide file tree
Showing 12 changed files with 1,127 additions and 79 deletions.
164 changes: 164 additions & 0 deletions .factorypath

Large diffs are not rendered by default.

11 changes: 11 additions & 0 deletions .project
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,15 @@
<nature>org.eclipse.jdt.core.javanature</nature>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
</natures>
<filteredResources>
<filter>
<id>1617791422369</id>
<name></name>
<type>30</type>
<matcher>
<id>org.eclipse.core.resources.regexFilterMatcher</id>
<arguments>node_modules|.git|__CREATED_BY_JAVA_LANGUAGE_SERVER__</arguments>
</matcher>
</filter>
</filteredResources>
</projectDescription>
3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"java.configuration.updateBuildConfiguration": "interactive"
}
7 changes: 7 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -241,5 +241,12 @@
</dependency>


<!-- other -->
<dependency>
<groupId>net.sf.supercsv</groupId>
<artifactId>super-csv</artifactId>
<version>2.4.0</version>
</dependency>

</dependencies>
</project>
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ public class InformationEntity {
@Getter(AccessLevel.NONE)
@Setter(AccessLevel.NONE)
private List<TextToken> originalEntity;

//Confidence-Wert der Entität
private Double conf;


/**
Expand Down Expand Up @@ -132,5 +135,39 @@ public String toString(){
return sb.toString().trim();
}

/**
* @author ChristineSchaefer
*
* @return confidence of an extraction
*/
public Double getConf(){
return conf;
}

/**
* @author ChristineSchaefer
*
* @param usedPattern
*/
public Double setConf(List<Pattern> usedPattern){
this.conf = 0d;
double product = 0d;

List<Double> confValue = new ArrayList<Double>();

for(Pattern p : usedPattern){
confValue.add(1 - p.getConf());
}

for(int i = 1; i <= confValue.size(); i++){
if(product == 0d){
product = confValue.get(i - 1);
} else {
product = product * confValue.get(i - 1);
}
}
conf = 1 - product;
return conf;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -11,124 +11,146 @@
/**
* @author geduldia
*
* Represents an Extraction-Pattern to identify Information (e.g. competences or tools) in JobAds.
* Consist of several PatternTokens and a Pointer to the Token(s) which has to be extracted in case of a match.
* Represents an Extraction-Pattern to identify Information (e.g.
* competences or tools) in JobAds. Consist of several PatternTokens and
* a Pointer to the Token(s) which has to be extracted in case of a
* match.
*
*/
@Data
public class Pattern {

@Setter(AccessLevel.NONE)
private List<PatternToken> tokens = new ArrayList<PatternToken>();
private List<Integer> extractionPointer = new ArrayList<Integer>();
private String description;
private int id;

private Double conf;

/**
* adds a new token to this Pattern
*
* @param token toAdd
*/
public void addToken(PatternToken token){
public void addToken(PatternToken token) {
tokens.add(token);
}

/**
* @return number of tokens in this Pattern
*/
public int getSize(){
public int getSize() {
return tokens.size();
}

/**
* returns the Token at the given index
*
* @param index
* @return token at index
*/
public Token getTokenAt(int index){
public Token getTokenAt(int index) {
return tokens.get(index);
}


/* (non-Javadoc)
/**
* @author ChristineSchaefer
*
* @param tp
* @param fp
*/
public Double setConf(int tp, int fp) {
this.conf = ((double) tp / (tp + fp));
return conf;
}

/**
* @author ChristineSchaefer
*
* @return confidence of a pattern
*/
public Double getConf() {
return conf;
}

/*
* (non-Javadoc)
*
* @see java.lang.Object#toString()
*/
@Override
public String toString(){
@Override
public String toString() {
StringBuffer sb = new StringBuffer();
sb.append("ID:\t"+id+"\n");
sb.append("NAME:\t"+description+"\n");
sb.append("ID:\t" + id + "\n");
sb.append("NAME:\t" + description + "\n");
for (int t = 0; t < tokens.size(); t++) {
de.uni_koeln.spinfo.data.Token token = tokens.get(t);
sb.append("TOKEN:\t");
sb.append(token.getToken()+"\t");
sb.append(token.getLemma()+"\t");
sb.append(token.getPosTag()+"\t");
sb.append(token.isInformationEntity()+"\n");
}
sb.append(token.getToken() + "\t");
sb.append(token.getLemma() + "\t");
sb.append(token.getPosTag() + "\t");
sb.append(token.isInformationEntity() + "\n");
}
sb.append("EXTRACT:\t");
for (Integer i : extractionPointer) {
sb.append(i+",");
sb.append(i + ",");
}
sb.deleteCharAt(sb.length()-1);
sb.deleteCharAt(sb.length() - 1);
sb.append("\n");
sb.append("CONF:\t"+"0.0");
sb.append("CONF:\t" + conf + "\n\n");
return sb.toString();
}

// /**
// * @return list of all tokens in this Pattern
// */
// public List<PatternToken> getTokens(){
// return tokens;
// }

// /**
// * @return pattern-id
// */
// public int getId() {
// return id;
// }
//
// /**
// * @param id
// */
// public void setId(int id) {
// this.id = id;
// }


// /**
// * @param description
// */
// public void setDescription(String description){
// this.description = description;
// }
//
// /**
// * @return description
// */
// public String getDescription(){
// return description;
// }



// /**
// * returns the index/the indices which point to the Patterns to extract in case of a match
// * @return list of indices
// */
// public List<Integer> getPointer(){
// return extractionPointer;
// }
//
// /**
// *
// * @param extractionPointer
// */
// public void setPointer(List<Integer> extractionPointer){
// this.extractionPointer = extractionPointer;
// }



// /**
// * @return list of all tokens in this Pattern
// */
// public List<PatternToken> getTokens(){
// return tokens;
// }

// /**
// * @return pattern-id
// */
// public int getId() {
// return id;
// }
//
// /**
// * @param id
// */
// public void setId(int id) {
// this.id = id;
// }

/**
* @param description
*/
public void setDescription(String description) {
this.description = description;
}

/**
* @return description
*/
public String getDescription() {
return description;
}

// /**
// * returns the index/the indices which point to the Patterns to extract in
// case of a match
// * @return list of indices
// */
// public List<Integer> getPointer(){
// return extractionPointer;
// }
//
// /**
// *
// * @param extractionPointer
// */
// public void setPointer(List<Integer> extractionPointer){
// this.extractionPointer = extractionPointer;
// }

}
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
package quenfo.de.uni_koeln.spinfo.information_extraction.db_io;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
Expand Down Expand Up @@ -508,4 +512,45 @@ public static void createIndex(Connection connection, String table, String colum
connection.commit();
}

/**
*
* Reads the extractions (competence, no competence) from .txt file.
* @author Christine Schaefer
*
* @param path
* @return extractions
* @throws IOException
*/
public static List<String> readCompetences(File file) throws IOException {

BufferedReader br = new BufferedReader(new FileReader(file));

String line;
List<String> extractions = new ArrayList<String>();
while ((line = br.readLine()) != null) {
extractions.add(line);
}
br.close();

return extractions;
}

/**
* Create list of validated competences if no file is given.
* @author Christine Schaefer
*
* @param entities
* @return list of validated competences
*/
public static List<String> saveValidatedCompetences(Map<ExtractionUnit, List<String>> entities) {
List<String> validatedCompetences = new ArrayList<String>();
for (ExtractionUnit ie : entities.keySet()) {
for (String competence : entities.get(ie)) {
if (!validatedCompetences.contains(competence))
validatedCompetences.add(competence);
}
}
return validatedCompetences;
}

}
Loading

0 comments on commit a954f94

Please sign in to comment.