Skip to content

Commit 485f2cb

Browse files
alrramathiasbynens
authored andcommitted
.functions: Improve httpcompression and move it to its own file
Closes mathiasbynens#162.
1 parent b63c761 commit 485f2cb

File tree

2 files changed

+127
-6
lines changed

2 files changed

+127
-6
lines changed

.functions

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -112,12 +112,6 @@ function gz() {
112112
printf "gzip: %d bytes (%2.2f%%)\n" "$gzipsize" "$ratio"
113113
}
114114

115-
# Test if HTTP compression (RFC 2616 + SDCH) is enabled for a given URL.
116-
# Send a fake UA string for sites that sniff it instead of using the Accept-Encoding header. (Looking at you, ajax.googleapis.com!)
117-
function httpcompression() {
118-
local encoding="$(curl -LIs -H 'User-Agent: Mozilla/5 Gecko' -H 'Accept-Encoding: gzip,deflate,compress,sdch' "$1" | grep '^Content-Encoding:')" && echo "$1 is encoded using ${encoding#* }" || echo "$1 is not using any encoding"
119-
}
120-
121115
# Syntax-highlight JSON strings or files
122116
# Usage: `json '{"foo":42}'` or `echo '{"foo":42}' | json`
123117
function json() {

bin/httpcompression

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
#!/usr/bin/env bash
2+
3+
# Test if HTTP compression (RFC 2616 + SDCH) is enabled for a given URL
4+
5+
declare -r hUA="Mozilla/5.0 Gecko"
6+
declare -r hAE="Accept-Encoding: gzip, deflate, sdch"
7+
declare -r maxConTime=15
8+
declare -r maxTime=30
9+
10+
declare availDicts="" dict="" dictClientID="" dicts="" headers="" i="" \
11+
indent="" url="" encoding="" urlHeaders=""
12+
13+
headers="$( curl --connect-timeout $maxConTime \
14+
-A "$hUA" `# Send a fake UA string for sites
15+
# that sniff it instead of using
16+
# the Accept-Encoding header` \
17+
-D - `# Get response headers` \
18+
-H "$hAE" \
19+
-L `# If the page was moved to a different
20+
# location, redo the request` \
21+
-m $maxTime \
22+
-s `# Don\'t show the progress meter` \
23+
-S `# Show error messages` \
24+
-o /dev/null `# Ignore content` \
25+
"$1" )" \
26+
&& ( \
27+
28+
url="$1"
29+
30+
# Iterate over the headers of all redirects
31+
while [ -n "$headers" ]; do
32+
33+
# Get headers for the "current" URL
34+
urlHeaders="$( printf "%s" "$headers" |
35+
sed -n '1,/^HTTP/p' )"
36+
37+
# Remove the headers for the "current" URL
38+
headers="${headers/"$urlHeaders"/}"
39+
40+
# ----------------------------------------------------------------------
41+
# | SDCH |
42+
# ----------------------------------------------------------------------
43+
44+
# SDCH Specification:
45+
# - www.blogs.zeenor.com/wp-content/uploads/2011/01/Shared_Dictionary_Compression_over_HTTP.pdf
46+
47+
# Check if the server advertised any dictionaries
48+
dicts="$( printf "%s" "$urlHeaders" |
49+
grep -i 'Get-Dictionary:' |
50+
cut -d':' -f2 |
51+
sed s/,/\ /g )"
52+
53+
if [ -n "$dicts" ]; then
54+
55+
availDicts=""
56+
dict=""
57+
58+
for i in $dicts; do
59+
60+
# Check If the dictionary location is specified as a path,
61+
# and if so, construct it's URL from the host name of the
62+
# referrer URL
63+
[[ "$i" != http* ]] \
64+
&& dict="$(printf "$url" |
65+
sed -En 's/([^/]*\/\/)?([^/]*)\/?.*/\1\2/p')"
66+
67+
dict="$dict$i"
68+
69+
# Request the dictionaries from the server and
70+
# construct the `Avail-Dictionary` header value
71+
#
72+
# [ The user agent identifier for a dictionary is defined
73+
# as the URL-safe base64 encoding (as described in RFC
74+
# 3548, section 4 [RFC3548]) of the first 48 bits (bits
75+
# 0..47) of the dictionary's SHA-256 digest ]
76+
#
77+
dictClientID="$( curl --connect-timeout $maxConTime \
78+
-A "$hUA" -LsS -m $maxTime "$dict" |
79+
openssl dgst -sha256 -binary |
80+
openssl base64 |
81+
cut -c 1-8 |
82+
sed -e 's/\+/-/' -e 's/\//_/' )"
83+
84+
[ -n $availDicts ] && availDicts="$adics,$dictClientID" \
85+
|| availDicts="$dictClientID"
86+
87+
done
88+
89+
# Redo the request (advertising the available dictionaries)
90+
# and replace the old resulted headers with the new ones
91+
urlHeaders="$( curl --connect-timeout $maxConTime \
92+
-A "$hUA" -D - -H "$hAE" \
93+
-H "Avail-Dictionary: $availDicts" \
94+
-m $maxTime -o /dev/null -sS "$1" )"
95+
fi
96+
97+
# ----------------------------------------------------------------------
98+
99+
# Get the content encoding header values
100+
encoding="$( printf "%s" "$urlHeaders" |
101+
grep -i 'Content-Encoding:' |
102+
cut -d' ' -f2 |
103+
tr "\r" "," |
104+
tr -d "\n" |
105+
sed 's/,$//' )"
106+
107+
[ -n "$encoding" ] && encoding="[$encoding]"
108+
109+
# Print the output for the "current" URL
110+
if [ "$url" != "$1" ]; then
111+
printf "%s\n" "$indent$url $encoding"
112+
indent=" "$indent
113+
else
114+
printf "\n%s\n" " $1 $encoding"
115+
indent=""
116+
fi
117+
118+
# Get the next URL value
119+
url="$( printf "%s" "$urlHeaders" |
120+
grep -i 'Location' |
121+
sed -e 's/Location://' |
122+
tr -d '\r' )"
123+
124+
done
125+
printf "\n"
126+
127+
) || printf ""

0 commit comments

Comments
 (0)