awk — Probability & Combinatorics
Factorial
echo "10" | awk '{
f=1; for(i=2;i<=$1;i++) f*=i
printf "%d! = %.0f\n", $1, f
}'
# 10! = 3628800
Permutations P(n,r) — ordered selections
# P(10,3) = 10!/(10-3)! = 720
awk 'BEGIN{
n=10; r=3
p=1; for(i=n;i>n-r;i--) p*=i
printf "P(%d,%d) = %.0f\n", n, r, p
}'
Combinations C(n,r) — unordered selections
# C(10,3) = 10!/(3!×7!) = 120
awk 'BEGIN{
n=10; r=3
num=1; den=1
for(i=1;i<=r;i++){num*=(n-i+1); den*=i}
printf "C(%d,%d) = %.0f\n", n, r, num/den
}'
Password entropy — bits of entropy for a given character set and length
# How strong is a 16-char password from 95 printable ASCII characters?
awk 'BEGIN{
charset=95; length=16
entropy=length*log(charset)/log(2)
printf "%d chars from %d symbols = %.1f bits of entropy\n", length, charset, entropy
printf "keyspace: 2^%.0f ≈ 10^%.0f\n", entropy, entropy*log(2)/log(10)
}'
Binomial probability — P(X=k) = C(n,k) * p^k * (1-p)^(n-k)
# 10 servers, each has 1% chance of failure. P(exactly 2 fail)?
awk 'BEGIN{
n=10; k=2; p=0.01
# C(n,k)
c=1; for(i=1;i<=k;i++) c*=(n-i+1)/i
prob=c * p^k * (1-p)^(n-k)
printf "P(X=%d) = %.6f (%.4f%%)\n", k, prob, prob*100
}'
Binomial distribution table — all outcomes
# 5 deployments, 90% success rate each
awk 'BEGIN{
n=5; p=0.9
printf "%3s %10s %10s\n", "k", "P(X=k)", "P(X<=k)"
cumul=0
for(k=0;k<=n;k++){
c=1; for(i=1;i<=k;i++) c*=(n-i+1)/i
prob=c*p^k*(1-p)^(n-k)
cumul+=prob
printf "%3d %10.6f %10.6f\n", k, prob, cumul
}
}'
Expected value and variance
# Dice roll simulation — expected value from observed data
cat <<'EOF' | awk '{sum+=$1; sumsq+=$1^2; n++} END{
ev=sum/n
var=sumsq/n-ev^2
printf "E[X]=%.4f Var(X)=%.4f σ=%.4f\n", ev, var, sqrt(var)
printf "(fair die: E=3.5 Var=2.917)\n"
}'
3
5
2
6
1
4
3
6
2
5
4
1
EOF
Poisson probability — P(X=k) = (lambda^k * e^-lambda) / k!
# Average 3 alerts per hour. P(exactly 5 in an hour)?
awk 'BEGIN{
lambda=3; k=5
# k!
f=1; for(i=2;i<=k;i++) f*=i
prob=(lambda^k * exp(-lambda))/f
printf "P(X=%d | λ=%d) = %.6f (%.4f%%)\n", k, lambda, prob, prob*100
}'
SLA uptime probability — compound availability
# 3 services in series, each 99.9% uptime
awk 'BEGIN{
n=3; p=0.999
combined=p^n
printf "%d services @ %.3f%% each\n", n, p*100
printf "combined availability: %.6f%%\n", combined*100
printf "expected downtime: %.2f minutes/year\n", (1-combined)*525960
}'
Monte Carlo estimation of pi
awk 'BEGIN{
srand()
n=100000; inside=0
for(i=1;i<=n;i++){
x=rand(); y=rand()
if(x^2+y^2<=1) inside++
}
pi_est=4.0*inside/n
printf "π ≈ %.6f (n=%d, error=%.6f)\n", pi_est, n, pi_est-atan2(0,-1)
}'
Bayes' theorem — test accuracy
# Disease prevalence: 1%. Test sensitivity: 95%. Specificity: 90%.
# P(disease | positive test)?
awk 'BEGIN{
prev=0.01; sens=0.95; spec=0.90
p_pos=sens*prev + (1-spec)*(1-prev)
p_disease_given_pos=(sens*prev)/p_pos
printf "P(disease|positive) = %.4f (%.2f%%)\n", p_disease_given_pos, p_disease_given_pos*100
printf "Most positives are false positives when prevalence is low\n"
}'
Birthday problem — P(collision) for n items in m slots
# How many random session IDs before 50% collision chance? (m = 2^32)
awk 'BEGIN{
m=2^32
p=1.0
for(n=1;n<=100000;n++){
p*=(m-n+1)/m
if(1-p>=0.5){printf "50%% collision at n=%d (pool=%d)\n", n, m; break}
}
printf "approximation: sqrt(2*m*ln(2)) = %.0f\n", sqrt(2*m*log(2))
}'