#!/usr/bin/perl -T

use 5.14.0;
use strict;
use warnings;
use HTML::Defang qw(:all);
use CGI qw(:standard);
use URI::Escape;

print header();
print qq{<!DOCTYPE html>
<html>
<head>
<title>HTML::Defang Test</title>
</head>
<body>};

# Dump source?
if (param("src")) {
	open (my $fh, "<", "defang.cgi");
	print "<pre>";
	while (<$fh>) {
		print escape($_);
	}
	print "</pre></body></html>";
	exit(0);
}

# Check HTTP referrer to prevent abuse.
my $referer_ok = 0;
if (!$ENV{HTTP_REFERER} || $ENV{HTTP_REFERER} =~ /^(?:https?:\/\/)?(?:www\.)?kirsle\.net/i) {
	$referer_ok = 1;
} else {
	print qq{<h1>Bad HTTP Referrer</h1>

It appears you were sent here from a different website. Be careful when trying to
re-submit the code below!};
}

my $code = param("code") || '';
my $edit = escape($code);

# Only accept code that has been POSTed.
if ($ENV{REQUEST_METHOD} eq "POST" && $referer_ok) {
	# Fix even more mismatched tags (first line is from Defang.pm)
	my @mismatch = qw(table tbody thead tr td th font div span pre center blockquote dl ul ol h1 h2 h3 h4 h5 h6 fieldset tt p noscript a
		b strong i em u ins s del);

	# Defang it.
	my $defang = HTML::Defang->new (
		fix_mismatched_tags => 1,
		mismatched_tags_to_fix => \@mismatch,
		url_callback        => \&DefangUrlCallback,
		css_callback        => \&DefangCssCallback,
	);
	my $sanitized = $defang->defang($code);
	my $escaped   = escape($sanitized);

	print qq{<h1>Your Code (Live)</h1>

$sanitized

<h1>Your Code (Defanged)</h1>

<textarea cols="80" rows="8" readonly>$escaped</textarea>};
}

print qq{<h1>Code Editor</h1>

<form name="edit" method="POST" action="defang.cgi">
Write any old nasty HTML code you want!<br>
<textarea cols="80" rows="8" name="code">$edit</textarea><br>
<input type="submit" value="Submit">
</form>

<p>
Test script for <a href="http://search.cpan.org/perldoc?HTML::Defang" target="_blank">HTML::Defang</a>.
See the <a href="defang.cgi?src=1">source code</a>.

</body>
</html>};

sub DefangUrlCallback {
	my ($context, $defang, $lcTag, $lcAttrKey, $attrVal, $attrHash, $htmlR, $outR) = @_;

	return DEFANG_ALWAYS if $$attrVal =~ /javascript:/;

	$$attrVal = "http://localhost/link.pl?to=" . uri_escape($$attrVal);
}

sub DefangCssCallback {
	my ($context, $defang, $selectors, $selector_rules, $lcTag, $isAttr, $outR) = @_;

	my $i = 0;
	foreach (@$selectors) {
		my $selector_rule = $$selector_rules[$i];
		foreach my $keyValueRules (@{$selector_rule}) {
			foreach my $keyValueRule (@{$keyValueRules}) {
				my ($key, $value) = @$keyValueRule;

				# Sabotage any javascript values.
				$value =~ s/[\x0D\x0A]+//g;
				if ($value =~ /javascript/) {
					$$keyValueRule[1] = '..';
					$$keyValueRule[2] = DEFANG_ALWAYS;
				}
			}
		}
		$i++;
	}
}

sub regex_list {
	my $str = shift;

	my @chars = split(//, $str);
	my $re = "\\n*" . join("", map { "$_\\n*" } @chars);
	return $re;
}

sub escape {
	$_ = shift;
	s/&/&amp;/g;
	s/</&lt;/g;
	s/>/&gt;/g;
	s/"/&quot;/g;
	s/'/&apos;/g;
	return $_;
}

__DATA__

Problems with HTML::Defang:

* It tries to comment out CSS attributes that contain the word "javascript"
  or any variant (including "java\nscript" etc)
  * But this works to get around it:
    <img style="*/background-image: url('javascript:alert(1)')/*">
  * It tries to comment out your code, but your code short circuits the comments