sort and uniq

sort by column

sort -t "," -k2,2 -n -r -u <file> 
# -t      field separator,
# -k2,2   column 2 only, 
# -n      numerical sort (or else lexicographical, 10 comes before 1), 
# -r      reverse, 
# -u      return first line of duplicated values of column 2 after sorting (need to use with -n if numerical column)

other sort options

# -k2.1,2.2  sort by field 2 position 1 to field 2 position 2
# -k3n       sort by field 3 onward
# -R         scramble list order
# -f         ignore case
# -s         "stabilizes sort", if ignore case, keeps original input order

sort + uniq

to count unique lines

sort <file> | uniq -ic
# -i   ignore case
# -c   count

to print duplicated lines ONCE

sort <file> | uniq -d
# -d   print duplicated lines once

to print ALL duplicated lines

sort <file> | uniq -iD
# -D   print all duplicated lines

to print number of duplicated items

sort <file> | uniq -D | uniq -ic

to print TOTAL number of duplicated lines

sort <file> | uniq -idc
# -d   print duplicated lines once

uniq as to first “w” characters

uniq -w 2 <file>   # if first 2 char same, then line count as duplicate

to get unique lines based on a column

need AWK

sort -nk3 <file> | awk -F"[. ]" '!a[$2]++'
# sorts by 3rd column numerically
# awk removes duplicates based on 2nd column

EOF

Last updated on May 23, 2020

Edit this page