view CSV
- view csv in human readable columns when field has quoted commas (e.g. “1,2,3”)
$ awk '!(NR%2){gsub(",",";")} 1' RS=\" ORS=\" <file.csv> | column -t -s , | less -S
-v flag
$ awk -F" " -v d1="$d1" -v d2="$d2" '$1==d1"-"d2"-2009" {print $1,$2,$3,$4,$5}'
col min max
- put min or max of column into a var
min=`awk 'BEGIN{a=1000}{if ($1<0+a) a=$1} END{print a}' <file>`
echo $min
max=`awk 'BEGIN{a= 0}{if ($1>0+a) a=$1} END{print a}' <file>`
echo $max
- 1000 and 0 are just values to begin testing
- 0+a so cast a into numeric, or else will compare lexicographically
- can also do
$ cut -d " " -f1 <file> | sort -n | sed -n '1s/^/min=/p; $s/^/max=/p'
define output sep
$ awk 'BEGIN {FS="\t"; OFS=","; print} {$1=$1}1' <file>
rm dup in 2 col, both directions
$ awk '!a[$1$2]++ && !a[$2$1]++' <file>
col order
$ awk ' { t = $1; $1 = $2; $2 = t; print; } ' <file>
filter rows with col regex
$ awk '$1~/^chr([1-9]|[1-9][0-9]|[XY])$/' <file.bed>
- matches if column 1 starts with “chr”, ends with 1-9 or 10-99 or X or Y
- ~ specifies for regex match (== specifies comparison match)
another regex comparison
$ SNP_seed="rs7523690"
$ awk '/'$SNP_seed'/ {print}' <input_file> >> <output_file>
or
$ awk '{ if($11 ~ /'$SNP_seed'/ {print}}' <input_file> >> <output_file>
-F flag
$ awk -F: '{print $4}' <file>
filter rows with multiple cols
$ awk -F "\t" '{ if(($7 == 6) && ($8 >= 11000000 && $8 <= 25000000)) { print } }' <file>
$ awk '$1==22 && $2<23966388 && $3>23966388' <file>
sub multiple cols
- substitute within multiple columns
$ awk '{sub(/find1/,replace1,$col1);sub(/find2/,replace2,$col2);print $col1, $col2}' input.txt > output.txt
num cols
- get number of cols in a file
$ awk '{print NF}' <file> | sort -nu | tail -n1
EOF