awk


view CSV

  • view csv in human readable columns when field has quoted commas (e.g. “1,2,3”)
$ awk '!(NR%2){gsub(",",";")} 1' RS=\" ORS=\" <file.csv> | column -t -s , | less -S

-v flag

$ awk -F" " -v d1="$d1" -v d2="$d2" '$1==d1"-"d2"-2009" {print $1,$2,$3,$4,$5}'

col min max

  • put min or max of column into a var
min=`awk 'BEGIN{a=1000}{if ($1<0+a) a=$1} END{print a}' <file>`
echo $min
max=`awk 'BEGIN{a=   0}{if ($1>0+a) a=$1} END{print a}' <file>`
echo $max
  • 1000 and 0 are just values to begin testing
  • 0+a so cast a into numeric, or else will compare lexicographically
  • can also do
$ cut -d " " -f1 <file> | sort -n | sed -n '1s/^/min=/p; $s/^/max=/p'
  • but it’s slower to sort

define output sep

$ awk 'BEGIN {FS="\t"; OFS=","; print} {$1=$1}1' <file>

rm dup in 2 col, both directions

$ awk '!a[$1$2]++ && !a[$2$1]++' <file>

col order

$ awk ' { t = $1; $1 = $2; $2 = t; print; } ' <file>

filter rows with col regex

$ awk '$1~/^chr([1-9]|[1-9][0-9]|[XY])$/' <file.bed>
  • matches if column 1 starts with “chr”, ends with 1-9 or 10-99 or X or Y
  • ~ specifies for regex match (== specifies comparison match)

another regex comparison

$ SNP_seed="rs7523690"
$ awk '/'$SNP_seed'/ {print}' <input_file> >> <output_file>
or
$ awk '{ if($11 ~ /'$SNP_seed'/ {print}}' <input_file> >> <output_file>

-F flag

$ awk -F: '{print $4}' <file>

filter rows with multiple cols

$ awk -F "\t" '{ if(($7 == 6) && ($8 >= 11000000 && $8 <= 25000000)) { print } }' <file>
  • or
$ awk '$1==22 && $2<23966388 && $3>23966388' <file>

sub multiple cols

  • substitute within multiple columns
$ awk '{sub(/find1/,replace1,$col1);sub(/find2/,replace2,$col2);print $col1, $col2}' input.txt > output.txt

num cols

  • get number of cols in a file
$ awk '{print NF}' <file> | sort -nu | tail -n1

EOF

Previous
Next