public function get_errno() {
return $this->errno;
}
+
+ /**
+ * When using a proxy, an additional HTTP response code may appear at
+ * the start of the header. For example, when using https over a proxy
+ * there may be 'HTTP/1.0 200 Connection Established'. Other codes are
+ * also possible and some may come with their own headers.
+ *
+ * If using the return value containing all headers, this function can be
+ * called to remove unwanted doubles.
+ *
+ * Note that it is not possible to distinguish this situation from valid
+ * data unless you know the actual response part (below the headers)
+ * will not be included in this string, or else will not 'look like' HTTP
+ * headers. As a result it is not safe to call this function for general
+ * data.
+ *
+ * @param string $input Input HTTP response
+ * @return string HTTP response with additional headers stripped if any
+ */
+ public static function strip_double_headers($input) {
+ // I have tried to make this regular expression as specific as possible
+ // to avoid any case where it does weird stuff if you happen to put
+ // HTTP/1.1 200 at the start of any line in your RSS file. This should
+ // also make it faster because it can abandon regex processing as soon
+ // as it hits something that doesn't look like an http header. The
+ // header definition is taken from RFC 822, except I didn't support
+ // folding which is never used in practice.
+ $crlf = "\r\n";
+ return preg_replace(
+ // HTTP version and status code (ignore value of code).
+ '~^HTTP/1\..*' . $crlf .
+ // Header name: character between 33 and 126 decimal, except colon.
+ // Colon. Header value: any character except \r and \n. CRLF.
+ '(?:[\x21-\x39\x3b-\x7e]+:[^' . $crlf . ']+' . $crlf . ')*' .
+ // Headers are terminated by another CRLF (blank line).
+ $crlf .
+ // Second HTTP status code, this time must be 200.
+ '(HTTP/1.[01] 200 )~', '$1', $input);
+ }
}
/**
$this->assertEquals($contenthash, $fileref->get_contenthash());
$this->assertEquals($filecontent, $fileref->get_content());
}
+
+ /**
+ * Tests the strip_double_headers function in the curl class.
+ */
+ public function test_curl_strip_double_headers() {
+ // Example from issue tracker.
+ $mdl30648example = <<<EOF
+HTTP/1.0 407 Proxy Authentication Required
+Server: squid/2.7.STABLE9
+Date: Thu, 08 Dec 2011 14:44:33 GMT
+Content-Type: text/html
+Content-Length: 1275
+X-Squid-Error: ERR_CACHE_ACCESS_DENIED 0
+Proxy-Authenticate: Basic realm="Squid proxy-caching web server"
+X-Cache: MISS from homer.lancs.ac.uk
+X-Cache-Lookup: NONE from homer.lancs.ac.uk:3128
+Via: 1.0 homer.lancs.ac.uk:3128 (squid/2.7.STABLE9)
+Connection: close
+
+HTTP/1.0 200 OK
+Server: Apache
+X-Lb-Nocache: true
+Cache-Control: private, max-age=15
+ETag: "4d69af5d8ba873ea9192c489e151bd7b"
+Content-Type: text/html
+Date: Thu, 08 Dec 2011 14:44:53 GMT
+Set-Cookie: BBC-UID=c4de2e109c8df6a51de627cee11b214bd4fb6054a030222488317afb31b343360MoodleBot/1.0; expires=Mon, 07-Dec-15 14:44:53 GMT; path=/; domain=bbc.co.uk
+X-Cache-Action: MISS
+X-Cache-Age: 0
+Vary: Cookie,X-Country,X-Ip-is-uk-combined,X-Ip-is-advertise-combined,X-Ip_is_uk_combined,X-Ip_is_advertise_combined, X-GeoIP
+X-Cache: MISS from ww
+
+<html>...
+EOF;
+ $mdl30648expected = <<<EOF
+HTTP/1.0 200 OK
+Server: Apache
+X-Lb-Nocache: true
+Cache-Control: private, max-age=15
+ETag: "4d69af5d8ba873ea9192c489e151bd7b"
+Content-Type: text/html
+Date: Thu, 08 Dec 2011 14:44:53 GMT
+Set-Cookie: BBC-UID=c4de2e109c8df6a51de627cee11b214bd4fb6054a030222488317afb31b343360MoodleBot/1.0; expires=Mon, 07-Dec-15 14:44:53 GMT; path=/; domain=bbc.co.uk
+X-Cache-Action: MISS
+X-Cache-Age: 0
+Vary: Cookie,X-Country,X-Ip-is-uk-combined,X-Ip-is-advertise-combined,X-Ip_is_uk_combined,X-Ip_is_advertise_combined, X-GeoIP
+X-Cache: MISS from ww
+
+<html>...
+EOF;
+ // For HTTP, replace the \n with \r\n.
+ $mdl30648example = preg_replace("~(?!<\r)\n~", "\r\n", $mdl30648example);
+ $mdl30648expected = preg_replace("~(?!<\r)\n~", "\r\n", $mdl30648expected);
+
+ // Test stripping works OK.
+ $this->assertEquals($mdl30648expected, curl::strip_double_headers($mdl30648example));
+ // Test it does nothing to the 'plain' data.
+ $this->assertEquals($mdl30648expected, curl::strip_double_headers($mdl30648expected));
+
+ // Example from OU proxy.
+ $httpsexample = <<<EOF
+HTTP/1.0 200 Connection established
+
+HTTP/1.1 200 OK
+Date: Fri, 22 Feb 2013 17:14:23 GMT
+Server: Apache/2
+X-Powered-By: PHP/5.3.3-7+squeeze14
+Content-Type: text/xml
+Connection: close
+Content-Encoding: gzip
+Transfer-Encoding: chunked
+
+<?xml version="1.0" encoding="ISO-8859-1" ?>
+<rss version="2.0">...
+EOF;
+ $httpsexpected = <<<EOF
+HTTP/1.1 200 OK
+Date: Fri, 22 Feb 2013 17:14:23 GMT
+Server: Apache/2
+X-Powered-By: PHP/5.3.3-7+squeeze14
+Content-Type: text/xml
+Connection: close
+Content-Encoding: gzip
+Transfer-Encoding: chunked
+
+<?xml version="1.0" encoding="ISO-8859-1" ?>
+<rss version="2.0">...
+EOF;
+ // For HTTP, replace the \n with \r\n.
+ $httpsexample = preg_replace("~(?!<\r)\n~", "\r\n", $httpsexample);
+ $httpsexpected = preg_replace("~(?!<\r)\n~", "\r\n", $httpsexpected);
+
+ // Test stripping works OK.
+ $this->assertEquals($httpsexpected, curl::strip_double_headers($httpsexample));
+ // Test it does nothing to the 'plain' data.
+ $this->assertEquals($httpsexpected, curl::strip_double_headers($httpsexpected));
+ }
}