sort by column
sort -t "," -k2,2 -n -r -u <file>
# -t field separator,
# -k2,2 column 2 only,
# -n numerical sort (or else lexicographical, 10 comes before 1),
# -r reverse,
# -u return first line of duplicated values of column 2 after sorting (need to use with -n if numerical column)
other sort options
# -k2.1,2.2 sort by field 2 position 1 to field 2 position 2
# -k3n sort by field 3 onward
# -R scramble list order
# -f ignore case
# -s "stabilizes sort", if ignore case, keeps original input order
sort + uniq
to count unique lines
sort <file> | uniq -ic
# -i ignore case
# -c count
to print duplicated lines ONCE
sort <file> | uniq -d
# -d print duplicated lines once
to print ALL duplicated lines
sort <file> | uniq -iD
# -D print all duplicated lines
to print number of duplicated items
sort <file> | uniq -D | uniq -ic
to print TOTAL number of duplicated lines
sort <file> | uniq -idc
# -d print duplicated lines once
uniq as to first “w” characters
uniq -w 2 <file> # if first 2 char same, then line count as duplicate
to get unique lines based on a column
sort -nk3 <file> | awk -F"[. ]" '!a[$2]++'
# sorts by 3rd column numerically
# awk removes duplicates based on 2nd column
EOF