Week 3 Lecture 2b:More on sorting¶

%run "boaz_utils.ipynb"

Time complexity review¶

Algorithm: A receipe for instructions how to compute output from input

Program: Coding up the algorithm in a programming language

Running Time: Depends on algorithm, language, computer

If input is longer program is slower.

O notation: By how much slower? Ignore programming language and computer, focus on the algorithm

def f(L):
    x = 0
    for i in range(len(L)):
        if L[i]> 100:
            x += L[i]
    return x

timer(f,genintlist(2000));

def g(L):
    x = 0
    for i in range(len(L)):
        for j in range(len(L)):
            x += L[i]-L[j]
    return x
g([243,45,90,23,54,758])

0

timer(g,genintlist(300));

compare_times(f,g,genintlist(300));

def h(L):
    x = 0
    for i in range(len(L)):
        x += L[i]*L[i]*L[i]
        
    for j in range(len(L)):
        x -= L[j]*L[j]*L[j]
        
    for k in range(len(L)):
        x += L[k]*L[k]*L[k]
    return x

timer(h,genintlist(500));

compare_times(f,h,genintlist(1000))

MergeSort algorithm¶

Recall: MergeSort splits the input list into two equal sized halves, recursively sorts each one, then merges the two sorted lists together.

def merge(A, B):
    C = []
    i = j = 0
    while i<len(A) and j<len(B):
        if A[i] < B[j]:
            C.append(A[i])
            i += 1
        else:
            C.append(B[j])
            j += 1
    C += A[i:] + B[j:]
    return C

def merge_sort(L):
    if len(L) <= 1:
        return L[:]
    else:
        A = L[0:len(L)//2]
        B = L[len(L)//2:]
        return merge(merge_sort(A), merge_sort(B))

print(merge([1,2,5,20], [6,8,11,13]))

[1, 2, 5, 6, 8, 11, 13, 20]

L = list(range(10,0,-1))
print(L)
print(merge_sort(L))

[10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

SelectionSort¶

This algorithm does the following on a list of size $n$:

Find minimum element and put it at the beginning.
Recursively sort the last $n-1$ elements.

#This function returns the index of the minimum element in list L
def find_min_index(L):
    current_index = 0
    current_min = L[0]
    for j in range(1,len(L)):
        if current_min > L[j]:
            current_min = L[j]
            current_index = j
    return current_index

def selectionsort(L):
    if len(L)<=1:   
        return L # a one-element list is always sorted
    min_idx = find_min_index(L) #non-recursive helper function
    L[0], L[min_idx] = L[min_idx], L[0] 
    return [L[0]] + selectionsort(L[1:len(L)])

compare_times(selectionsort,mergesort, genintlist(300))

n = 16
selection = [ "*"*(n-i) for i in range(n)]
mergestars = []

def print_stars(n):
    global mergestars
    mergestars.append("*"*n)
    if n <= 1: 
        return
    print_stars(n//2)
    print_stars(n//2)

print_stars(n)
def selection_vs_merge_stars():
    global mergestars,n,selection
    for i in range(max(len(selection),len(mergestars))):
        s = ""
        if i < len(selection):
            s= selection[i]
        s += " "*(10+n-len(s))
        if i < len(mergestars):
            s += mergestars[i]
        print(s)

selection_vs_merge_stars()

****************          ****************
***************           ********
**************            ****
*************             **
************              *
***********               *
**********                **
*********                 *
********                  *
*******                   ****
******                    **
*****                     *
****                      *
***                       **
**                        *
*                         *
                          ********
                          ****
                          **
                          *
                          *
                          **
                          *
                          *
                          ****
                          **
                          *
                          *
                          **
                          *
                          *

Analysis of running time¶

timer(mergesort,genintlist(1000));

If merging two lists of $n/2$ elements takes $n$ steps, time to sort $n$ elements is

$T(n) = 2T(n/2) + n$

def T(n):
    if n <=1: return 1
    return 2*T(int(n/2))+n

plotfunctions(T,"T",lambda n: n, "$n$", lambda n: n*log(n), r"$n\log n$", n = 500)

Claim: For $n\geq 2$, $T(n) \leq 2 n\log n$.
Proof: By induction
$T(2) = 2T(1)+2 = 4 = 2\cdot 2 \cdot \log 2$
$T(n) = 2T(n/2) + n$
$\leq 2\cdot 2(n/2)\log(n/2) + n$
$= 2n(\log n - 1) + n \leq 2n\log n$

QuickSort¶

Algorithm Quicksort: Input L of length n.

Pick random j in [0,...,n-1]
Let x=L[j] and reorder L so that locations L[0],..,L[i-1] are smaller than x and locations L[i],...,L[n-1] are at least as large as x.
Recursively sort L[0],...,L[i-1] and L[i+1],...,L[n-1]

def quicksort(L, beg=0, end=None):
    if end==None: end = len(L)-1
    if end <= beg: return
    x = L[random.randint(beg,end)]
    i,j = partition(L,x,beg,end)
    quicksort(L,beg,i)
    quicksort(L,j,end)

def partition(L,x,beg,end):
    i = beg
    j = end
    while i<j:
        if L[i]<x: i += 1
        elif L[j]>= x: j -= 1
        else: L[i],L[j]=L[j],L[i]
    while j<= end and L[j]<=x: j+=1
    return i-1,j

L = [random.randint(0,600) for n in range(300)]
quicksort(L)
L[:10]

[0, 0, 1, 2, 4, 7, 12, 14, 15, 15]

timer(lambda L:quicksort(L),genintlist(400));

compare_times(lambda L: quicksort(L),mergesort, genintlist(500))

Quick sort is not necessarily faster than mergesort but the main advantage is that it sorts in place

Sorting demo by Justin Johnson