#!/usr/bin/perl -T use 5.14.0; use strict; use warnings; use HTML::Defang qw(:all); use CGI qw(:standard); use URI::Escape; print header(); print qq{<!DOCTYPE html> <html> <head> <title>HTML::Defang Test</title> </head> <body>}; # Dump source? if (param("src")) { open (my $fh, "<", "defang.cgi"); print "<pre>"; while (<$fh>) { print escape($_); } print "</pre></body></html>"; exit(0); } # Check HTTP referrer to prevent abuse. my $referer_ok = 0; if (!$ENV{HTTP_REFERER} || $ENV{HTTP_REFERER} =~ /^(?:https?:\/\/)?(?:www\.)?kirsle\.net/i) { $referer_ok = 1; } else { print qq{<h1>Bad HTTP Referrer</h1> It appears you were sent here from a different website. Be careful when trying to re-submit the code below!}; } my $code = param("code") || ''; my $edit = escape($code); # Only accept code that has been POSTed. if ($ENV{REQUEST_METHOD} eq "POST" && $referer_ok) { # Fix even more mismatched tags (first line is from Defang.pm) my @mismatch = qw(table tbody thead tr td th font div span pre center blockquote dl ul ol h1 h2 h3 h4 h5 h6 fieldset tt p noscript a b strong i em u ins s del); # Defang it. my $defang = HTML::Defang->new ( fix_mismatched_tags => 1, mismatched_tags_to_fix => \@mismatch, url_callback => \&DefangUrlCallback, css_callback => \&DefangCssCallback, ); my $sanitized = $defang->defang($code); my $escaped = escape($sanitized); print qq{<h1>Your Code (Live)</h1> $sanitized <h1>Your Code (Defanged)</h1> <textarea cols="80" rows="8" readonly>$escaped</textarea>}; } print qq{<h1>Code Editor</h1> <form name="edit" method="POST" action="defang.cgi"> Write any old nasty HTML code you want!<br> <textarea cols="80" rows="8" name="code">$edit</textarea><br> <input type="submit" value="Submit"> </form> <p> Test script for <a href="http://search.cpan.org/perldoc?HTML::Defang" target="_blank">HTML::Defang</a>. See the <a href="defang.cgi?src=1">source code</a>. </body> </html>}; sub DefangUrlCallback { my ($context, $defang, $lcTag, $lcAttrKey, $attrVal, $attrHash, $htmlR, $outR) = @_; return DEFANG_ALWAYS if $$attrVal =~ /javascript:/; $$attrVal = "http://localhost/link.pl?to=" . uri_escape($$attrVal); } sub DefangCssCallback { my ($context, $defang, $selectors, $selector_rules, $lcTag, $isAttr, $outR) = @_; my $i = 0; foreach (@$selectors) { my $selector_rule = $$selector_rules[$i]; foreach my $keyValueRules (@{$selector_rule}) { foreach my $keyValueRule (@{$keyValueRules}) { my ($key, $value) = @$keyValueRule; # Sabotage any javascript values. $value =~ s/[\x0D\x0A]+//g; if ($value =~ /javascript/) { $$keyValueRule[1] = '..'; $$keyValueRule[2] = DEFANG_ALWAYS; } } } $i++; } } sub regex_list { my $str = shift; my @chars = split(//, $str); my $re = "\\n*" . join("", map { "$_\\n*" } @chars); return $re; } sub escape { $_ = shift; s/&/&/g; s/</</g; s/>/>/g; s/"/"/g; s/'/'/g; return $_; } __DATA__ Problems with HTML::Defang: * It tries to comment out CSS attributes that contain the word "javascript" or any variant (including "java\nscript" etc) * But this works to get around it: <img style="*/background-image: url('javascript:alert(1)')/*"> * It tries to comment out your code, but your code short circuits the comments