Skip to content

Commit 59454ff

Browse files
committed
• SSYWeblocGuy has new factored-out method to support parsing one file at a time, which is needed to preserve hierarchy when parsing folders of .webloc files.
• SSYWeblocGuy now does proper parsing of XML instead of cheesily grabbing the last string.
1 parent d1dc63a commit 59454ff

File tree

2 files changed

+122
-59
lines changed

2 files changed

+122
-59
lines changed

SSYWeblocGuy.h

+18-13
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,22 @@
33

44
@interface SSYWeblocGuy : NSObject <NSXMLParserDelegate> {
55
NSMutableString* m_xmlString ;
6-
BOOL m_accumulatingUrl ;
76
}
87

98
/*!
10-
@brief Returns an array of dictionaries, each containing the filename and
11-
URL extracted from any .webloc file found in a given array of file paths
9+
@brief Returns a dictionary containing the URL and name represented by
10+
a .webloc file at a given path
1211
13-
@details Each element in the result is a dictionary which contains objects
14-
for two keys, "url" which is the url extracted from the .webloc file, and
15-
"filename" which is the base name of the .webloc file, without the path
16-
and without the .webloc extension.
12+
@details The returned dictionary contains objects for two keys, "url" which
13+
is the url extracted from the .webloc file, and "filename" which is the base
14+
name of the .webloc file, without the path and without the .webloc extension.
1715
16+
If the given file's name does not have the .webloc extension, or does not
17+
XML containing a URL as expected for a .webloc file, returns nil.
18+
1819
This method extracts the URL from XML in the data fork which is assumed to
1920
look like this example.
20-
21+
2122
<?xml version="1.0" encoding="UTF-8"?>
2223
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
2324
<plist version="1.0">
@@ -26,16 +27,20 @@
2627
<string>http://ancienthistory.about.com/od/cityofrome/ss/7hillsofRome.htm</string>
2728
</dict>
2829
</plist>
29-
30+
3031
It simply parses whatever is in the last <string>. TODO: Do a proper parsing,
3132
looking for the <string> element of the <dict> element of the <plist> element.
32-
33+
3334
Although ancient versions of Safari created .webloc files with the target URL
3435
in the resource fork instead of the data fork, this method no longer supports
35-
that.
36-
*/
37-
36+
that. */
37+
+ (NSDictionary*)filenameAndUrlFromWeblocFileAtPath:(NSString*)path ;
3838

39+
/*!
40+
@brief Returns an array of dictionaries, each containing the filename and
41+
URL extracted from any .webloc file found in a given array of file paths by
42+
-filenameAndUrlFromWeblocFileAtPath:.
43+
*/
3944
+ (NSArray*)weblocFilenamesAndUrlsInPaths:(NSArray*)paths ;
4045

4146
@end

SSYWeblocGuy.m

+104-46
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,41 @@
11
#import "SSYWeblocGuy.h"
22

3+
/* Prior to 2016-08-10, this class did not have a proper state machine for
4+
parsing the XML expected in a .webloc file. I just took the last string that
5+
was parsed. This worked because there are only two strings in the file:
6+
<key>URL</key>
7+
<string>http://www.example.com</string>
8+
and the second one was the one I wanted. And so I marked a TODO to fix
9+
this someday.
10+
11+
Now, it is fixed, but I'm no sure if it is better or worse. The goal is to
12+
future-proof in case Apple changes the .webloc format. But after looking at
13+
this state machine, I'm not sure it's really any more future proof. It is
14+
definitely less cheesy. Oh, well, the old code is in git if I want to go back.
15+
*/
16+
17+
typedef enum {
18+
SSYWeblocParserStateZero,
19+
SSYWeblocParserStateInPlist,
20+
SSYWeblocParserStateInDict,
21+
SSYWeblocParserStateParsingAKey,
22+
SSYWeblocParserStateParsingUrlString,
23+
SSYWeblocParserStateGotAllWeNeed
24+
} SSYWeblocParserState ;
25+
26+
327
@interface SSYWeblocGuy ()
428

5-
@property (retain) NSMutableString* xmlString ;
29+
@property (retain) NSMutableString* stringBeingParsed ;
30+
@property (assign) SSYWeblocParserState parserState ;
631

732
@end
833

934

1035
@implementation SSYWeblocGuy
1136

12-
@synthesize xmlString = m_xmlString ;
13-
1437
- (void)dealloc {
15-
[m_xmlString release] ;
38+
[_stringBeingParsed release] ;
1639

1740
[super dealloc] ;
1841
}
@@ -22,29 +45,41 @@ - (void) parser:(NSXMLParser*)parser
2245
namespaceURI:(NSString*)namespaceURI
2346
qualifiedName:(NSString*)qualifiedName
2447
attributes:(NSDictionary*)attributeDict {
25-
if ([elementName isEqualToString:@"string"]) {
26-
// The contents are collected in parser:foundCharacters:.
27-
m_accumulatingUrl = YES ;
28-
// The mutable string needs to be reset to empty.
29-
[[self xmlString] setString:@""] ;
30-
}
48+
if ((_parserState == SSYWeblocParserStateZero) && [elementName isEqualToString:@"plist"]) {
49+
_parserState = SSYWeblocParserStateInPlist ;
50+
}
51+
else if ((_parserState == SSYWeblocParserStateInPlist) && [elementName isEqualToString:@"dict"]) {
52+
_parserState = SSYWeblocParserStateInDict ;
53+
}
54+
else if ((_parserState == SSYWeblocParserStateInDict) && [elementName isEqualToString:@"key"]) {
55+
_parserState = SSYWeblocParserStateParsingAKey ;
56+
}
57+
else if ((_parserState == SSYWeblocParserStateParsingUrlString) && [elementName isEqualToString:@"string"]) {
58+
if ([self.stringBeingParsed isEqualToString:@"URL"]) {
59+
_parserState = SSYWeblocParserStateParsingUrlString ;
60+
[self.stringBeingParsed setString:@""] ;
61+
}
62+
}
3163
}
3264

3365
- (void)parser:(NSXMLParser *)parser
3466
didEndElement:(NSString *)elementName
3567
namespaceURI:(NSString *)namespaceURI
3668
qualifiedName:(NSString *)qName {
37-
m_accumulatingUrl = NO ;
69+
if ((_parserState == SSYWeblocParserStateParsingAKey) && [elementName isEqualToString:@"key"]) {
70+
_parserState = SSYWeblocParserStateParsingUrlString ;
71+
}
72+
if ((_parserState == SSYWeblocParserStateParsingUrlString) && [elementName isEqualToString:@"string"]) {
73+
_parserState = SSYWeblocParserStateGotAllWeNeed ;
74+
}
3875
}
3976

4077
- (void) parser:(NSXMLParser*)parser
4178
foundCharacters:(NSString*)string {
42-
43-
if (m_accumulatingUrl) {
44-
// If the current element is one whose content we care about, append 'string'
45-
// to the property that holds the content of the current element.
46-
//
47-
[[self xmlString] appendString:string] ;
79+
if ((_parserState ==SSYWeblocParserStateParsingAKey) || (_parserState == SSYWeblocParserStateParsingUrlString)) {
80+
/* I have not investigated why this method gets invoked with
81+
string = @"\n" in between the real strings. I just filter them out… */
82+
[[self stringBeingParsed] appendString:[string stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]]] ;
4883
}
4984
}
5085

@@ -53,50 +88,73 @@ - (void) parser:(NSXMLParser*)parser
5388
NSLog(@"Found error in XML: %@", error) ;
5489
}
5590

56-
- (NSArray*)weblocFilenamesAndUrlsInPaths:(NSArray*)paths {
57-
NSMutableArray* filenamesAndURLs = [NSMutableArray array] ;
58-
59-
for (NSString* path in paths) {
60-
NSString* url = nil ;
61-
91+
- (NSDictionary*)filenameAndUrlFromWeblocFileAtPath:(NSString*)path {
92+
NSString* url = nil ;
93+
if ([path.pathExtension isEqualToString:@"webloc"]) {
6294
NSData* data = [NSData dataWithContentsOfFile:path] ;
95+
96+
/* The .webloc files produced by Safari 9.0 (macOS 10.11) or later
97+
are .plist files. Look for that first. */
6398
NSDictionary* dic = [NSPropertyListSerialization propertyListWithData:data
6499
options:0
65100
format:NULL
66101
error:NULL] ;
67102
url = [dic objectForKey:@"URL"] ;
103+
68104
if (!url) {
69-
if (data) {
105+
/* We are parsing a .webloc file produced by Safari 5 - 8, which
106+
uses XML format. */
107+
if (data.length > 0) {
70108
NSXMLParser* parser = [[NSXMLParser alloc] initWithData:data] ;
71109
[parser setDelegate:self] ;
72-
NSMutableString* xmlString = [[NSMutableString alloc] init] ;
73-
[self setXmlString:xmlString] ;
74-
[xmlString release] ;
75-
110+
self.parserState = SSYWeblocParserStateZero ;
111+
NSMutableString* stringBeingParsed = [NSMutableString new] ;
112+
self.stringBeingParsed = stringBeingParsed ;
113+
[stringBeingParsed release] ;
76114
[parser parse] ;
77-
// Note that -parse is synchronous and will not return until the parsing
78-
// is done or aborted.
115+
/* Note: -parse is synchronous and will not return until the parsing
116+
is done or aborted. */
79117
[parser release] ;
80118

81-
url = [self xmlString] ;
82-
83-
// Not really necessary, but for resource usage efficiency we
84-
// release xmlString here instead of in -dealloc…
85-
[self setXmlString:nil] ;
119+
url = [self.stringBeingParsed copy] ;
120+
[url autorelease] ;
121+
self.stringBeingParsed = nil ;
86122
}
87123
}
88-
89-
if (url) {
90-
NSString* filename = [[path lastPathComponent] stringByDeletingPathExtension] ;
91-
92-
NSDictionary* filenameAndURL = [NSDictionary dictionaryWithObjectsAndKeys:
93-
filename, @"filename",
94-
url, @"url",
95-
nil] ;
96-
97-
[filenamesAndURLs addObject:filenameAndURL] ;
124+
}
125+
126+
NSDictionary* answer ;
127+
if (url.length > 0) {
128+
NSString* filename = [[path lastPathComponent] stringByDeletingPathExtension] ;
129+
answer = [NSDictionary dictionaryWithObjectsAndKeys:
130+
[filename stringByDeletingPathExtension], @"filename",
131+
url, @"url",
132+
nil] ;
133+
}
134+
else {
135+
answer = nil ;
136+
}
137+
138+
return answer ;
139+
}
140+
141+
+ (NSDictionary*)filenameAndUrlFromWeblocFileAtPath:(NSString*)path {
142+
SSYWeblocGuy* instance = [[SSYWeblocGuy alloc] init] ;
143+
NSDictionary* answer = [instance filenameAndUrlFromWeblocFileAtPath:path] ;
144+
[instance release] ;
145+
146+
return answer ;
147+
}
148+
149+
- (NSArray*)weblocFilenamesAndUrlsInPaths:(NSArray*)paths {
150+
NSMutableArray* filenamesAndURLs = [NSMutableArray array] ;
151+
152+
for (NSString* path in paths) {
153+
NSDictionary* filenameAndUrl = [self filenameAndUrlFromWeblocFileAtPath:path] ;
154+
if (filenameAndUrl) {
155+
[filenamesAndURLs addObject:filenameAndUrl] ;
98156
}
99-
}
157+
}
100158

101159
if ([filenamesAndURLs count])
102160
return [[[NSArray alloc] initWithArray:filenamesAndURLs] autorelease] ;

0 commit comments

Comments
 (0)