tr -s ' ' ' ' #squeeze runs of spaces to single space tr '()' '{}' # change parens to braces tr ab ba # swap a and b tr \'\" \"\' # swap single and double quotes tr -cs 0-9 '' #all digits only, onto one line********************************************************
-r reverse order (i.e. descending) -f fold lowercase into uppercase letters -u eliminate duplicate lines -d letters, digits, spaces only (ie. ignore punctuation chars) -n numerically else 10 < 9 -k N sort from the Nth field +i sort from the i+1th field -i sort to i+1th field +i.j sort on i+1th field starting at j+1th position of that field -tc change field separator to c -b ignore leading blanks (fields start at first leading blank) -o output file (can be input file)$ sort -r -n -t: +2 /etc/passwd
$ ps ax | grep billybob # only lines of ps that contain billybob billybob Tue 12:21 tty03 sh
hello, this is file 'stuff' it has only two lines
$ grep th f1 hello, this is file 'stuff' $ grep aardvark f1 $ #no lines match, thus no outputif the pattern has any spaces or punctuation quote it:
$ grep 'is is' f1 # Use quotes to escape shell parsing/interpretation as two args is and is hello, this is file 'stuff' # (2nd is would be filename)
-n option print line number. useful if big file:
$ grep -n has f1 2: it has only two lines-v option inverts sense, i.e. lines that don't match:
$ grep -v has f1 hello, this is file 'stuff' $ grep -v 'a' rime.txt #lines that don't contain an a $ grep -v ' ' rime.txt #lines that don't contain a space-o option show only the match
$ grep billybob * # all lines in all files in current directory that contain billybob ... $ grep 'rand()' *.cpp #quote shell's () main.cpp: r1 = rand() % N; main.cpp: if (rand() % 2) simul.cpp: alpha[i] = rand() % limit; $ grep wills /etc/* 2>/dev/null #toss error messages
$ grep -l 'rand()' *.cpp main.cpp simul.cpp-i option ignore case:
$ grep -i oak myfile # all 8 strings $ grep -i mariner rime.txt-c gives count of matched lines (or pipe into wc -l):
$ grep -ci mariner rime.txtPatterns: grep has its own set of metacharacters (operators). Regular expressions: a language for describing the patterns of strings. Different and more powerful than shell's filename generation patterns.
$ grep 'h.s' f1 hello, this is file 'stuff' it has only two lines\ escape grep metacharacter: (grep's \, not the shell's)
$ grep '\.' f1 # quote to escape shell, \ to escape grep's "." grep's arg is \. $ # no lines in f1 that have "." $ grep \. f1 # arg to grep is . Actual match is first (ie. leftmost) on line. hello, this is file 'stuff' it has only two lines
$ grep 'h[ieo]s' f1 # his, hes, hos (no others) hello, this is file 'stuff' $ grep '[oO][aA][kK]' myfile #all 8 stringsRange of chars:
$ grep 'f[a-z]' f1 # fa, fb, fc,...fz hello, this is file 'stuff' $ grep '[0-9]' rime.txt #any digits? .[a-z] any char followed by a lowercase letter Reverse set or range: ^ as first character in [] [^aeiou] any char except lowercase vowel [^a-zA-Z] non-letter $ grep 'h[^ei]' f1 it has only two lines [A-Z][^A-Z] an uppercase letter followed by any char except an uppercase letter, e.g. D5, Dc, R , M$ 26 * 229 such 2 char combos [0-9][0-9][0-9][0-9][0-9] # "zip code"Anchors: ^ beginning of line, $ end of line
^The --match any line starting with The done$ --match any line ending with done \.$ --match any line ending with period .$ --match any char at end of line ^[^a-zA-Z] --line not starting with letter $ grep '^i' f1 # any line starting with i it has only two linesEx. list directories:
$ ls -l | grep '^d' # lines starting with dEx. list executable files:
$ ls -l | grep '^-..x' # starting with -, followed by any 2 chars, followed by x ^[0-9][0-9][0-9][0-9][0-9]$ # lines consisting of "zip code" (and nothing else)Closure: * match zero or more occurences of the preceding pattern. Multiplier.
ho*t #ht, hot, hoot, hooot,... [0-9][0-9]* #one or more digits .* #zero or more chars, i.e. anything/everything up to newline. grep '.*' == cat [a-zA-Z_][a-zA-Z0-9_]* #C/Java identifier <.*> #HTML tag -i '^[a-z][a-z]*$' lines consisting of letters onlyN.B. Newline is never matched.
.*v --match everything up to last v. cf. sed editing: hellov hiv there [a-zA-Z]* --match any alphabetic string, including null string [a-zA-Z][a-zA-Z]* --match any nonempty alphabetic string ^$ --match line with zero chars, i.e. newline only. Empty line. ^ *$ --match empty line and line with spaces only ^ *$ --match empty line and line with tabs only ^[ ]*$ --match empty line and line with spaces and tabs only. bash: ^V to escape Tab ^[ ][ ]*$ --lines with blanks and tabs only, but not empty lines $ grep 'a.*e.*i.*o.*u' /usr/share/dict/words # all 5 vowels in order, ex. sacrilegious # one of each vowel, in order, ex. facetious $ grep '^[^aeiou]*a[^aeiou]*e[^aeiou]*i[^aeiou]*o[^aeiou]*u[^aeiou]*$' /usr/dict/words # all 5 vowels, ex. unidirectional. a pipeline of greps: AND $ grep a /usr/share/dict/words | grep e | grep i | grep o | grep u $ grep '................' /usr/share/dict/words #lines longer than 15 chars. (wc -L will tell length of longest) $ grep '^.$' #lines of one character # words of 6 chars or more in letter order, ex. almost $ grep '^a*b*c*d*e*f*g*h*i*j*k*l*m*n*o*p*q*r*s*t*u*v*w*x*y*z*$' /usr/dict/words | grep '......' pattern\{3,5\} #match 3 to 5 of pattern [0-9]\{5\} #match 5 digits, e.g. zip code $ grep '[a-zA-Z]\{10,\}' rime.txt #lines with words of 10 or more characters $ grep -c '^[ ]*$' myfile # number of empty and blank lines 20 Ex. All lines with word cat: $ grep cat myfile # but gives cat, cattle, scatter,... $ grep ' cat ' myfile # but misses cat., cat!, cat?, cat,, (cat,... $ grep '[ ({]cat[)}.,?!]' myfile # but is inclusive? $ grep '[^A-Za-z]cat[^A-Za-z]' myfile # any punctuation ok [but misses beginning and end of line]-w matches exact words
Ex. users without password, i.e. 2nd field of passwd empty # start of line, any number of non-colon chars, followed by 2 colons: $ grep '^[^:]*::' /etc/passwd $ grep -v '^[^:]*:x:' /etc/passwd #passwords of xfgrep: no patterns, but many searches in parallel (OR). Can be done in grep with -F option.
$ fgrep 'garp > jones > billybob' /etc/passwd # all lines with garp or jones or billybob or combo $ fgrep -f names_file /etc/passwd # names_file has the words to search foregrep: all that grep has plus full/extended regular expressions.
$ egrep 'garp|jones|billybob' /etc/passwd # any line with garp or jones or billybob Jack|Jill Jones # Jack or Jill Jones, not Jack Jones or Jill Jones() for grouping:
(Jack|Jill) Jones # matches Jack Jones or Jill Jones compan(y|ies)+ one or more occurences of preceding pattern
[0-9]+ #one or more digits ^[ ]+$ #lines with blanks and/or tabs but not empty lines? zero or one occurence of preceding pattern
ho*t # ht, hot, hoot, hooot,... (ho)*t # t, hot, hohot, hohohot,... ho+t # hot, hoot, hooot,... ho?t # ht, hot 80[234]?86 #Intel processorsPractice:
editing_commands / script:
1.) line specifier (default is every line) to indicate which lines and/or
2.) editing instruction to operate on the matching lines
1a.) line number or range of numbers:
3 # third line in file 5,8 # lines 5 thru 8 [lines 5 and 8 by separate duplicated editing cmds] 10,$ # lines 10 thru end of file ($ is ed's last line specifier)Unlike ed, no relative line numbering (e.g. $-9 ninth from last, +2 two lines forward)
/big/ # all lines containing string big /[Dd]o/ # all lines containing Do or do /^f[a-z]/ # all lines starting f followed by letter /[0-9][0-9][0-9][0-9][0-9]/ #all lines with "zip code" /^[0-9][0-9][0-9][0-9][0-9]$/ #all lines consisting of "zip code" /regex/I case-insensitivea&b combined):
1,/enough/ # from line 1 to first line with enough /word/,20 # from first line with word to line 20! opposite of specified lines:
/word/,20! # from line 1 to line before first line with word and lines 21 thru end of file2.) editing instructions:
$ sed '' myfile # cat $ sed 'p' myfile # each line duplicated. [use?] $ sed 'd' myfile # delete every line, i.e. no output $ sed '10 q' myfile # head $ sed '/aardvark/d' myfile # delete lines with aardvark (output all others): grep -v aardvark $ sed '/aardvark/!d' myfile # delete every line that doesn't contain aardvark: grep aardvark $ sed '/^$/d' myfile # delete empty lines: grep '.' $ sed '/^[ ]*$/d' myfile # delete blank and empty lines (space and Tab) $ sed '1,/Chapter 1/d' myfile # delete lines 1 thru first w/Chapter 1 $ sed '1,5d' myfile #delete first 5 lines. (output lines 6 to end: tail +6)
$ sed -n 24p myfile # extract line 24 $ sed -n 24,28p myfile # extract lines 24 thru 28 $ sed -n '1,10p' myfile # head # only output lines 1 thru 10. Without -n would dup 1-10, then rest of file output $ sed -n '/LINUX/p' myfile # only lines with LINUX output $ sed -n '/LINUX/Ip' myfile # only lines with linux in any case output $ sed -n '/pattern/ p' myfile # grep 'pattern' $ sed -n '$p' myfile # only last line printed. tail -1 $ sed -n '/^[ ]*$/!p' myfile # delete blank lines. Non-blank lines are output. $ sed -n '/./p' myfile # non-empty lines output $ sed -n '/[^ ]/p' myfile # lines without a space
$ sed 's/Bob/Robert/g' myfile #replace every Bob with Robert $ sed '1,6s/Bob/Robert/' myfile #first Bob on a line to Robert in lines 1 thru 6 $ sed 's/^/ /' myfile #add 3 spaces at beginning of each line. g would be useless $ sed -r 's/ +/ /g' myfile # compress multiple blanks (run of spaces) to single blank # need 1 or 2 spaces before the + tr -s ' ' ' ' $ sed -r 's/ +$//' myfile # delete trailing blanks Right-trim $ sed -r 's/^ +//' myfile # delete leading blanks Left-trim $ sed -r 's/ +/,/g' myfile # change runs of spaces to comma (field separator) $ sed 's/<[^>]*>//' myfile # delete HTML tags (assumes no more than one per line...)newstring can include & which means entire matched oldstring:
$ sed 's/UNIX/"UNIX"/g' myfile # replace all UNIX with "UNIX". Shell "" escaped because in '' $ sed 's/UNIX/"&"/g' myfile # same& useful if oldstring is a pattern:
$ sed 's/.*/(&)/' myfile # parenthesize line $ sed 's/U[nN][iI][xX]/"&"/g' myfile # replace UNIX, UnIx ... with quoted UNIX, etc $ sed 's/[^ ][^ ]*/<&>/g' myfile # angle bracket each word $ sed -r 's/[^ ]+/<&>/g' myfile # angle bracket each wordEscape sed metacharacter with \ (sed's \)
$ sed 's/ and/ \&/g' myfile # replace and with &tagging: parts of oldstring enclosed in \( \) and referred to by number in new string
$ sed 's/\(Jack\) and \(Jill\)/\2 and \1/g' myfile # replace Jack and Jill with Jill and Jack $ sed 's/\([jJ]ack\) and \([jJ]ill\)/\2 and \1/g' myfile # same, but jack/Jack and jill/JillEx. try to change all seperate with separate:
$ sed 's/\([sS]ep\)erate/\1arate/g' myfileEx. file of Lastname, Firstname to Firstname Lastname
$ sed 's/\([^,]*\), \(.*\)/\2 \1/' myfilew --write to a file
$ who | sed 's/ .*$//' # all chars from first space to end of line matched, replaced by null string. $ not necessary.
$ sed 's/^...//' myfile # cut -c4- # delete first 3 chars of each line. ^ not needed. <3 chars in line ignored $ sed 's/\(...\).*/\1/' myfile # delete all but first three chars: cut -c1-3 $ sed 's/...//g' myfile # chars of line deleted until 0,1,or 2 left (last ones in line) $ sed 's/.$//' myfile # delete last char of each line $ sed 's/[^A-Za-z0-9]//g' myfile # delete all non-alphanumerics: tr -dc 'a-zA-Z0-9 \n'
$ sed 's/Dodger/Giant/g >s/Giant/Yankee/g' myfile # some Giant were originally DodgerEx. change billybob to garp, copy to file, delete lines with garp
$ sed 's/billybob/garp/gw garpfile > /garp/d' myfileEx. change billybob to garp, copy to file, delete original lines with garp
$ sed -n '/garp/!p # lines without garp printed > s/billybob/garp/gw garpfile' myfile-f option: editing cmds/script from file (shell escape quotes not needed in file): remember complex, multiple use...
$ sed -f edit1 -f edit2 myfile # as if cmds were on cmd line-e option: intermingling cmds from file and cmd line:
$ sed -e 's/New York/Boston/g' -f edit1 myfileEx. Double space a file (add newline at end of each line):
$ sed 's/$/\ #sed escape (\) the newline that would terminate editing instruction >/' myfileN.B. Newline is never matched, thus can't delete them.(Use tr)
$ sed 's/ /\ >/g' myfile # each space replaced by newline. Might be some runs of blank lines if were run of spaces.Ex. Replace each sequence of one or more spaces and tabs with newline:
$ sed -r 's/[ ]+/\ >/g' myfileEx. One char per line:
$ sed 's/./&\ >/g' myfile # each and every char change to itself and newlineEx. Delete all non-letters, keep words separate:
$ sed 's/[^A-Za-z]/ /g' myfile # replace non-letters by spaceEx. Delete blank lines, strip punctuation, one word per line:
$ sed 's/[^A-Za-z]/ /g >s/ /\ >/g >/^ *$/d' myfile
s/[^a-zA-Z ]//g # delete non-letters except spaces s/ */\ # runs of spaces to newline /g /^ *$/d # delete blank lines******************************************
^ beginning of line $ end of line anchors . any one char [] any one enclosed char [^] any char not enclosed * zero or more occurences of preceding + one or more occurences of preceding ? zero or one occurence of preceding () grouping | or /reg expr/
~ match (contains) !~ does not match $2~/Delaware/ --if 2nd field matches (contains) Delaware,do the action $2~/^Delaware$/ --if 2nd field is Delaware $2!~/Delaware/ --if 2nd field is not Delaware, do action $3~/P|M/ --if 3rd field has a P or M $3~/^P$|^M$/ --if 3rd field is P or M. Field anchors. $3~/^P|^M/ --if 3rd field starts with P or MRelational expressions: < <= == != >= >
!($2~/./) $2!~/./ $2~/^$/ length($2)==0 built-in function
NR==20,NR==40 # lines 20 thru 40 NR==1,/[Ee]nd/ # lines 1 thru first with End or end /^100/,/^199/ # from line that starts with 100 thru line that starts with 199, e.g. sorted database key field
MD 100 2.00 NY 400 1.20 MA 345 2.05...
{total_sold += $2 sales = $2 * $3 total_sales += sales print $0, sales} END {print " ", total_sold, " ",total_sales}Say is in tot.awk:
{chars += length($0) + 1 # +1 for newline words += NF} END{print chars, words, NR, " average chars perline:",chars/NR}try average chars/word, words/line
Arrays: elements created upon use, initialized to zero
Ex. sum two fields
{sum[1] += $3
sum[2] += $4}
END {print sum[1], sum[2]}
Ex. check for proper parenthesization. Report lines with errors. NB only one ( and ) per line!
# lefts as number of as yet unmatched left parens and as index of array of NR of lines with (
# \( escapes grouping metacharacter
/\(/ {A[++lefts] = NR} # left paren on this line /\)/ && lefts==0 {print "Mismatched ) on line " NR} /\)/ && lefts>0 {lefts--} END {if (lefts>0) { print lefts " unmatched ('s on lines: " for (i=1; i<=lefts; i++) print A[i] } }
{if (NF > nf) nf = NF # Max fields any record for (i=1; i<=NF; i++) # loop thru all fields of line sum[i] += $i} END {for (i=1; i<=nf; i++) print sum[i]}
{if (NF > nf) nf = NF # Max fields any record for (i=1; i<=NF; i++) { # loop thru all fields of line sum[i] += $i count[i]++ #count number of each field } } END {for (i=1; i<=nf; i++) print sum[i], " Average:",sum[i]/count[i]}Ex. find longest line. file longest
{if (length > max) { max = length line = $0 }} END {print line}$ ls -1 | awk -f longest
$ awk '{print length, $0}' myfile | sort -n | > awk '{for (i=length($1)+2; i<=length; i++) > printf "%s",substr($0,i,1) > printf "\n"}'
{for (i=1; i<=NF; i++) count[$i]++} # count array will have as many elements as different words in file END {for (word in count) # loop over each element in count print word, " ", count[word]}
{for (i=length($0); i>0; i--) printf "%s", substr($0,i,1) printf "\n"}$ awk '/'`echo * | sed "s/ /\|/g"`'/ {print}'
{lines[NR] = $0} # store each line in array of strings END {for (i=NR; i>0; i--) print lines[i]
awk'NF>0 {if ($1 == lastword) # if 1st word same as last word from prev line print $1, NR for (i=2; i<=NF; i++) # each other field if ($i == $(i-1)) # if ith field same as i-1th print $i, NR lastword = $NF} # lastword is last field' $*
function fact(n) { # no space before ( if (n <= 1) return 1 else return n * fact(n-1) }
getline # read next record getline x # read next record into x getline < "file2" # read a record from file2 getline x < "file2" # read a record from file2 into x getline x < /dev/tty # interactively read user input into xCommand line args: awk options not in argument list