diff --git a/urltitle.tcl b/urltitle.tcl index a375f59..2571c62 100644 --- a/urltitle.tcl +++ b/urltitle.tcl @@ -48,7 +48,7 @@ namespace eval UrlTitle { variable fetchLimit 5 ;# How many times to process redirects before erroring # BINDS - bind pubm "-|-" {*://*} UrlTitle::handler + bind pubm "-|-" {*} UrlTitle::handler setudef flag urltitle ;# Channel flag to enable script. setudef flag logurltitle ;# Channel flag to enable logging of script. @@ -102,8 +102,7 @@ namespace eval UrlTitle { set unixtime [clock seconds] if {[channel get $chan urltitle] && ($unixtime - $delay) > $last && (![matchattr $user $ignore])} { foreach word [split $text] { - if {[string length $word] >= $length && [regexp {^(f|ht)tp(s|)://} $word] && \ - ![regexp {://([^/:]*:([^/]*@|\d+(/|$))|.*/\.)} $word]} { + if {[string length $word] >= $length && [regexp {((?:[a-zA-Z][\w-]+:(?:\/{1,3}|[a-zA-Z0-9%])|www\d{0,3}[.]|[a-zA-Z0-9\-]+[.][a-zA-Z]{2,4}\/?)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\)){0,}(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s\!()\[\]{};:\'\"\.\,<>?«»“”‘’]){0,})} $word]} { set last $unixtime # enable https if supported if {$httpsSupport} { @@ -112,7 +111,11 @@ namespace eval UrlTitle { set urtitle [UrlTitle::parse $word] if {$htmlSupport} { set urtitle [::htmlparse::mapEscapes $urtitle] + } else { + # Fallback to a simple decoder if htmlparse not installed + set urtitle [simpleHtmlDecode $urtitle] } + # unregister https if supported if {$httpsSupport} { ::http::unregister https @@ -121,14 +124,13 @@ namespace eval UrlTitle { break } if {[string length $urtitle]} { - puthelp "PRIVMSG $chan :Title: $urtitle" + puthelp "PRIVMSG $chan :\002$urtitle" } - break } } } # change to return 0 if you want the pubm trigger logged additionally.. - return 1 + return 0 } # General HTTP redirect handler @@ -182,7 +184,18 @@ namespace eval UrlTitle { variable timeout variable tdomSupport set title "" + if {[info exists url] && [string length $url]} { + if { + ([string first "http://" $url] == -1) && + ([string first "https://" $url] == -1) + } { + set url "http://$url" + } + + ## Some websites will display a title if an image is passed without an extension. + regsub -nocase {(\.png|\.gif|.jpeg|\.jpg)\Z} $url {} url + if {[catch {set http [Fetch $url -timeout $timeout]} results]} { putlog "Connection to $url failed" putlog "Error: $results" @@ -226,6 +239,399 @@ namespace eval UrlTitle { return $title } + # Simple html decoder if htmlparse is not available + proc simpleHtmlDecode {text} { + set title "" + set html_mapping { + ‘ ' + ’ ' + ’ ' + – ' + ' ' + " " + " " + ‚ ‚ + “ “ + ” ” + „ „ + † † + ‡ ‡ + ‰ ‰ + ‹ ‹ + › › + ♠ ♠ + ♣ ♣ + ♥ ♥ + ♦ ♦ + ‾ ‾ + ← ← + ← ← + ↑ ↑ + ↑ ↑ + → → + → → + ↓ ↓ + ↓ ↓ + ↖ ↖ + ↖ ↖ + ↗ ↗ + ↗ ↗ + ↙ ↙ + ↙ ↙ + ↘ ↘ + ↘ ↘ + ▲ ▲ + ▲ ▲ + ▴ ▴ + ▴ ▴ + ▶ ▶ + ▶ ▶ + ▸ ▸ + ▸ ▸ + ► ► + ► ► + ▼ ▼ + ▼ ▼ + ▾ ▾ + ▾ ▾ + ◀ ◀ + ◀ ◀ + ◂ ◂ + ◂ ◂ + ◄ ◄ + ◄ ◄ + ™ ™ + ' ' + ™ ™ + � - + � - + ! ! + ! ! + " {"} + " {"} + " {"} + # {#} + # {#} + $ $ + $ $ + % % + % % + & & + & & + & & + ' ' + ' ' + ( ( + ( ( + ) ) + ) ) + * * + * * + + + + + + + , , + , , + - - + - - + . . + . . + / / + / / + ⁄ / + 0 - + 0 - + : : + : : + ; ; + ; ; + < < + < < + < < + = = + = = + > > + > > + > > + ? ? + ? ? + @ @ + @ @ + A - + A - + [ [ + [ [ + \ \ + \ \ + ] ] + ] ] + ^ ^ + ^ ^ + _ _ + _ _ + ` ` + ` ` + a - + a - + { { + | | + } } + ~ ~ + … … + … … + – – + – – + — — + — — + ˜ - + Ÿ " " +   " " + ¡ ¡ + ¡ ¡ + ¢ ¢ + ¢ ¢ + £ £ + £ £ + ¤ ¤ + ¤ ¤ + ¥ ¥ + ¥ ¥ + ¦ ¦ + ¦ ¦ + &brkbar; ¦ + § § + § § + ¨ ¨ + ¨ ¨ + ¨ ¨ + © © + © © + ª ª + ª ª + « « + « « + ¬ ¬ + ¬ ¬ + ® ® + ® ® + ¯ ¯ + ¯ ¯ + &hibar; ¯ + ° ° + ° ° + ± ± + ± ± + ² ² + ² ² + ³ ³ + ³ ³ + ´ ´ + ´ ´ + µ µ + µ µ + ¶ ¶ + ¶ ¶ + · · + · · + ¸ ¸ + ¸ ¸ + ¹ ¹ + ¹ ¹ + º º + º º + » » + » » + ¼ ¼ + ¼ ¼ + ½ ½ + ½ ½ + ¾ ¾ + ¾ ¾ + ¿ ¿ + ¿ ¿ + À À + À À + Á Á + Á Á + Â Â + Â Â + Ã Ã + Ã Ã + Ä Ä + Ä Ä + Å Å + Å Å + Æ Æ + Æ Æ + Ç Ç + Ç Ç + È È + È È + É É + É É + Ê Ê + Ê Ê + Ë Ë + Ë Ë + Ì Ì + Ì Ì + Í Í + Í Í + Î Î + Î Î + Ï Ï + Ï Ï + Ð Ð + Ð Ð + Ñ Ñ + Ñ Ñ + Ò Ò + Ò Ò + Ó Ó + Ó Ó + Ô Ô + Ô Ô + Õ Õ + Õ Õ + Ö Ö + Ö Ö + × × + × × + Ø Ø + Ø Ø + Ù Ù + Ù Ù + Ú Ú + Ú Ú + Û Û + Û Û + Ü Ü + Ü Ü + Ý Ý + Ý Ý + Þ Þ + Þ Þ + ß ß + ß ß + à à + à à + á á + á á + â â + â â + ã ã + ã ã + ä ä + ä ä + å å + å å + æ æ + æ æ + ç ç + ç ç + è è + è è + é é + é é + ê ê + ê ê + ë ë + ë ë + ì ì + ì ì + í í + í í + î î + î î + ï ï + ï ï + ð ð + ð ð + ñ ñ + ñ ñ + ò ò + ò ò + ó ó + ó ó + ô ô + ô ô + õ õ + õ õ + ö ö + ö ö + ÷ ÷ + ÷ ÷ + ø ø + ø ø + ù ù + ù ù + ú ú + ú ú + û û + û û + ü ü + ü ü + ý ý + ý ý + þ þ + þ þ + ÿ ÿ + ÿ ÿ + Α Α + α α + Β Β + β β + Γ Γ + γ γ + Δ Δ + δ δ + Ε Ε + ε ε + Ζ Ζ + ζ ζ + Η Η + η η + Θ Θ + θ θ + Ι Ι + ι ι + Κ Κ + κ κ + Λ Λ + λ λ + Μ Μ + μ μ + Ν Ν + ν ν + Ξ Ξ + ξ ξ + Ο Ο + ο ο + Π Π + π π + Ρ Ρ + ρ ρ + Σ Σ + σ σ + Τ Τ + τ τ + Υ Υ + υ υ + Φ Φ + φ φ + Χ Χ + χ χ + Ψ Ψ + ψ ψ + Ω Ω + ω ω + ● ● + • • + ∞ ∞ + ∞ ∞ + } + set title [string map $html_mapping $text] + return $title + } putlog "Initialized Url Title Grabber v$scriptVersion" }