22
33import java .io .File ;
44import java .io .IOException ;
5- import java .text .ParseException ;
6- import java .text .SimpleDateFormat ;
7- import java .util .Date ;
85import java .util .HashMap ;
96import java .util .TreeMap ;
107import java .util .Vector ;
11- import java .util .regex .Pattern ;
128
139import javax .xml .transform .TransformerException ;
1410
1915import gov .nara .nwts .ftapp .ActionResult ;
2016import gov .nara .nwts .ftapp .FTDriver ;
2117import gov .nara .nwts .ftapp .Timer ;
18+ import gov .nara .nwts .ftapp .ftprop .FTPropEnum ;
19+ import gov .nara .nwts .ftapp .ftprop .FTPropFile ;
2220import gov .nara .nwts .ftapp .ftprop .FTPropString ;
2321import gov .nara .nwts .ftapp .importer .DefaultImporter ;
2422import gov .nara .nwts .ftapp .importer .DelimitedFileReader ;
3735 */
3836public class EAD2DAO extends DefaultImporter {
3937
40- public static enum EAD2DCStatsItems implements StatsItemEnum {
41- Record (StatsItem .makeStringStatsItem ("Record" , 100 ).setExport (false ));
42-
38+ public static final String P_MATCHTYPE = "match-type" ;
39+ public static final String P_DCCSV = "csv-file" ;
40+ public static final String P_MATCH = "match-col" ;
41+ public static final String P_NAME = "name-col" ;
42+ public static final String P_DAOID = "daoid-col" ;
43+ public static final String P_LINK = "link-col" ;
44+ public static final String P_THUMB = "thumb-col" ;
45+ private FTPropFile dcFile ;
46+ public static enum EAD2DAOStatsItems implements StatsItemEnum {
47+ Record (StatsItem .makeStringStatsItem ("Record" , 100 ).setExport (false )),
48+ Field_Name (StatsItem .makeStringStatsItem ("Field Name" ).setInitVal ("TBD" )),
49+ EAD_ID (StatsItem .makeStringStatsItem ("EAD ID" , 100 )),
50+ REF_ID (StatsItem .makeStringStatsItem ("REF ID" , 150 )),
51+ DigitalObjectId (StatsItem .makeStringStatsItem ("Digital Object ID" , 150 )),
52+ DigitalObjectTitle (StatsItem .makeStringStatsItem ("Digital Object Title" , 150 )),
53+ PublishDAO (StatsItem .makeEnumStatsItem (TF .class , "Publish Digital Object Record" )),
54+ DAOLink (StatsItem .makeStringStatsItem ("File URL of Linked-to digital object" , 150 )),
55+ DAOThumbnail (StatsItem .makeStringStatsItem ("File URL of Thumbnail" , 150 ));
56+ ;
4357 StatsItem si ;
4458
45- EAD2DCStatsItems (StatsItem si ) {
59+ EAD2DAOStatsItems (StatsItem si ) {
4660 this .si = si ;
4761 }
4862
@@ -59,11 +73,41 @@ public Stats create(String key) {
5973 }
6074
6175 public static StatsItemConfig details = StatsItemConfig
62- .create (EAD2DCStatsItems .class );
76+ .create (EAD2DAOStatsItems .class );
77+
78+ public static enum TF {TRUE ,FALSE }
79+ public static enum EAD_MATCHER {
80+ TITLE (4 ),
81+ AS_REFID (2 );
82+ int index ;
83+ EAD_MATCHER (int index ) {
84+ this .index = index ;
85+ }
86+ }
6387
6488
6589 public EAD2DAO (FTDriver dt ) {
6690 super (dt );
91+ dcFile = new FTPropFile (this .dt , this .getClass ().getSimpleName (), P_DCCSV , P_DCCSV , "CSV file containing columns to match, Optional" , "" );
92+ this .ftprops .add (new FTPropEnum (dt , this .getClass ().getSimpleName (),
93+ P_MATCHTYPE , P_MATCHTYPE ,
94+ "Name of EAD field to match in CSV file" , EAD_MATCHER .values (), EAD_MATCHER .TITLE ));
95+ ftprops .add (dcFile );
96+ this .ftprops .add (new FTPropString (dt , this .getClass ().getSimpleName (),
97+ P_MATCH , P_MATCH ,
98+ "Name of column to match" ,"dc.title[en]" ));
99+ this .ftprops .add (new FTPropString (dt , this .getClass ().getSimpleName (),
100+ P_DAOID , P_DAOID ,
101+ "Name of column to assign as a DAO identifier" ,"dc.identifier.uri[en]" ));
102+ this .ftprops .add (new FTPropString (dt , this .getClass ().getSimpleName (),
103+ P_NAME , P_NAME ,
104+ "Name of column to use as a DAO name" ,"dc.title[en]" ));
105+ this .ftprops .add (new FTPropString (dt , this .getClass ().getSimpleName (),
106+ P_LINK , P_LINK ,
107+ "Name of column to assign as a DAO link" ,"dc.identifier.uri[en]" ));
108+ this .ftprops .add (new FTPropString (dt , this .getClass ().getSimpleName (),
109+ P_THUMB , P_THUMB ,
110+ "Name of column to assign as a thumbnail url" ,"thumbnail-link" ));
67111 }
68112
69113 public String toString () {
@@ -77,8 +121,77 @@ public String getShortName() {
77121 return "EAD2DAO" ;
78122 }
79123
124+ private HashMap <String ,Vector <String >> mapVals = new HashMap <>();
125+ private int i_match = -1 ;
126+ private int i_dao = -1 ;
127+ private int i_name = -1 ;
128+ private int i_link = -1 ;
129+ private int i_thumb = -1 ;
130+
131+ public void initMapFile () throws IOException {
132+ i_match = -1 ;
133+ i_dao = -1 ;
134+ i_name = -1 ;
135+ i_link = -1 ;
136+ i_thumb = -1 ;
137+ mapVals .clear ();
138+ File f = dcFile .getFile ();
139+ if (f == null ) {
140+ return ;
141+ }
142+ if (!f .exists ()) {
143+ return ;
144+ }
145+ DelimitedFileReader dfr = new DelimitedFileReader (f , "," );
146+ Vector <String > header = dfr .getRow ();
147+ for (int i =0 ; i <header .size (); i ++) {
148+ String s = header .get (i );
149+ if (this .getProperty (P_MATCH ).equals (s )) {
150+ i_match = i ;
151+ }
152+ if (this .getProperty (P_DAOID ).equals (s )) {
153+ i_dao = i ;
154+ }
155+ if (this .getProperty (P_NAME ).equals (s )) {
156+ i_name = i ;
157+ }
158+ if (this .getProperty (P_LINK ).equals (s )) {
159+ i_link = i ;
160+ }
161+ if (this .getProperty (P_THUMB ).equals (s )) {
162+ i_thumb = i ;
163+ }
164+ }
165+ if (i_match == -1 ) {
166+ return ;
167+ }
168+ for (Vector <String >row =dfr .getRow (); row !=null ; row =dfr .getRow ()) {
169+ String key = normalizeKey (row .get (i_match ));
170+ mapVals .put (key , row );
171+ }
172+ }
173+
174+ public String normalizeKey (String s ) {
175+ return s .toLowerCase ()
176+ .replaceAll ("[^a-z0-9]" , " " )
177+ .replaceAll (" +" , " " );
178+ }
179+
180+ public String getMapValue (String key , int col , String def ) {
181+ if (col < 0 ) {
182+ return def ;
183+ }
184+ key = normalizeKey (key );
185+ if (!mapVals .containsKey (key )) {
186+ return def ;
187+ }
188+ return mapVals .get (key ).get (col );
189+ }
190+
80191 public ActionResult importFile (File selectedFile ) throws IOException {
81- details = StatsItemConfig .create (EAD2DCStatsItems .class );
192+ details = StatsItemConfig .create (EAD2DAOStatsItems .class );
193+ EAD_MATCHER matcher = (EAD_MATCHER )getProperty (P_MATCHTYPE );
194+ initMapFile ();
82195 HashMap <String , Object > params = new HashMap <>();
83196 Timer timer = new Timer ();
84197 TreeMap <String , Stats > types = new TreeMap <String , Stats >();
@@ -89,14 +202,22 @@ public ActionResult importFile(File selectedFile) throws IOException {
89202 XMLUtil .doTransform (d , csv , "edu/georgetown/library/fileAnalyzer/ead-dao.xsl" , params );
90203 DelimitedFileReader dfr = new DelimitedFileReader (csv , "," );
91204 Vector <String > header = dfr .getRow ();
92- for (String col : header ) {
93- details .addStatsItem (col , StatsItem .makeStringStatsItem (col ));
94- }
95205 int rownum = 1_000_000 ;
96206 for (Vector <String >row =dfr .getRow (); row !=null ; row =dfr .getRow ()) {
97207 String key = "" +rownum ++;
98208 Stats stats = Generator .INSTANCE .create (key );
99209 types .put (key , stats );
210+ if (row .size () >= 8 ) {
211+ String matchkey = row .get (matcher .index );
212+ stats .setVal (EAD2DAOStatsItems .Field_Name , row .get (0 ));
213+ stats .setVal (EAD2DAOStatsItems .EAD_ID , row .get (1 ));
214+ stats .setVal (EAD2DAOStatsItems .REF_ID , row .get (2 ));
215+ stats .setVal (EAD2DAOStatsItems .DigitalObjectId , getMapValue (matchkey , i_dao , row .get (3 )));
216+ stats .setVal (EAD2DAOStatsItems .DigitalObjectTitle , getMapValue (matchkey , i_name , row .get (4 )));
217+ stats .setVal (EAD2DAOStatsItems .PublishDAO , row .get (5 ));
218+ stats .setVal (EAD2DAOStatsItems .DAOLink , getMapValue (matchkey , i_link , row .get (6 )));
219+ stats .setVal (EAD2DAOStatsItems .DAOThumbnail , getMapValue (matchkey , i_thumb , row .get (7 )));
220+ }
100221 for (int i =0 ; i <header .size (); i ++) {
101222 String s = row .size () > i ? row .get (i ) : "" ;
102223 String col = header .get (i );
0 commit comments