Can use in to check if a string is in another string
string="TREEBRANCH IS brown"
if "own" in string:
    print("own is in there")
if "tree" in string:
    print("tree is in there")
else:
    print("tree is not in there")
Check if a string starts with another
files = ["image001.jpg","IMG_981.JPG","fav.gif","news.txt"]
for filename in files:
    if filename.startswith("image"):
        print(filename,"stars with image")
    if filename.endswith("gif") or filename.lower().endswith(".jpg"):
        print(filename,"is an image type")
Sometime we want to match not a single equality but a pattern. Mostly this is used for text processing.
https://docs.python.org/3/library/re.html
Regular expressions (RE) are used to match a string. It is a test to see if a string matches a pattern.
import re
RESULT = re.search(PATTERN,QUERYSTRING)
if RESULT:
    # WE HAD A MATCH
else:
    # WE DID NOT HAVE A MATCH
import re
m = re.search("bow","elbow")
if m:
    print("matched bow")
else:
    print("did not match bow")
Matches pattern to string There are several components to the match.
\d matches numeric (0-9)\D matches NOT numeric not(0-9)\s matches white space\S matches NOT white space[A-Z] - ranges, all letters A-Z. - matches anythingre.search('\d bird', '8 birds') # true
re.search('\d bird', '1 bird') # true
re.search('\d bird', 'A bird') # false
re.search('[123] bird', '1 bird')  # true
re.search('[0-3] bird', '4 birds') # false
re.search('\d bird', '4 Birds') # false
re.search('\d [Bb]ird', '4 Birds') # true
Additionally the RE grammar allows repetitions
+ - match one or more times* - match zero or more times? - match 0 or 1 timere.search('\d birds?','8 birds') # true
re.search('\d birds?','1 bird') # true
re.search('A+B','AAAAAAB') # true
re.search('A+B','AB') # true
re.search('A+B','B') # false
re.search('A*B','AAAAAAB') # true
re.search('A*B','AB') # true
re.search('A*B','B') # true
Use Parentheses to group patterns and further repeat. Items in the parentheses that are captured can be retrieved and used.
import re
m = re.search("((AB)+)C","ABABABCDED")
if m:
    print("Group 0",m.group(0))
    print("Group 1",m.group(1))
    print("Group 2",m.group(2))
re.search('\d bird', '8 birds') # true
re.search('\d bird$', '8 birds') # false
re.search('^\d bird', '8 birds') # true
re.search('^\d bird', '10 birds') # false
If you want to find more than one occurance, or
count the number occurance you can use search or findall
options
start =0
m = re.search(pattern, string, start)
while( m ):
     #  process this match
     start = m.end()+1
     m = re.search(pattern,string,start)
Python REs have an option called compile which will
(potentially) improve speed of pattern matching
pattern = re.compile("AACA")
matches = pattern.search(DNA)
if match:
    print(match.group(0))
Match parts of strings with more complicated construction
import re
m = re.search("((AB))C","ABABABCDED")
if m:
    print("Group 0",m.group(0))
    print("Group 1",m.group(1))
    print("Group 2",m.group(2))
m = re.search("C+((AB)+)","CCABABABCDED")
if m:
    print("Group 0",m.group(0))
    print("Group 1",m.group(1))
    print("Group 2",m.group(2))
Restriction Enzymes
EcoRI   = "GAATTC"
EcoRII  = "CC[AT]GG"
RestrictionEnzymes = [EcoRI, EcoRII]
DNA = "ACAGACGAGAGAATTCGGTAGAT"
for RE in RestrictionEnzymes:
   pattern = re.compile(RE)
   match = pattern.search(DNA)
   count = pattern.findall(DNA)
   print(RE,"matches", len(count), "sites")
 print("//")
Replacement options
The re.sub() function allow replacement
re.sub(pattern, repl, string, count=0, flags=0)
To replace all instances of ‘cat’ with ‘dog’
#!/usr/bin/env python3
import re
message="The cat curled up on the couch for a catnap"
newmsg = re.sub(r'cat',r'dog',message)
print(message)
print(newmsg)
# only replace first instance
newmsg = re.sub(r'cat',r'dog',message,1)
print(newmsg)
Now with a pattern
import itertools, sys, re, os
Chr8="http://sgd-archive.yeastgenome.org/sequence/S288C_reference/chromosomes/fasta/chr08.fsa"
PREsite=r'TGA[AT]AC'
REPLACE='PREPRE'
Chr8File="chr08.fsa"
if not os.path.exists(Chr8File):
    os.system("curl -O {}".format(Chr8))
# define what a header looks like in FASTA format
def isheader(line):
    return line[0] == '>'
def aspairs(f):
    seq_id = ''
    sequence = ''
    for header,group in itertools.groupby(f, isheader):
        if header:
            line = next(group)
            seq_id = line[1:].split()[0]
        else:
            sequence = ''.join(line.strip() for line in group)
            yield seq_id, sequence
with open(Chr8File,"rt") as fh:
    seqs = aspairs(fh)
    for seqinfo in seqs:
        seqstr = seqinfo[1].lower()
        newseq=re.sub(PREsite,REPLACE,seqstr,flags=re.IGNORECASE)
        print(newseq)
#!/usr/bin/env python3
#Python code to demonstrate pattern matching
# import the regular expression library
import re
import random
random.seed(11012) # initialize the starting seed - we will all have basically same result this way
# a random DNA sequence generator
def rand_DNA (length):
    rand_DNA=""
    bases = ['A', 'C', 'G', 'T' ]
    base_ct = len(bases)
    for n in range(length):
        rand_DNA += bases[random.randint(0,base_ct-1)]
    return rand_DNA
# lets initialize a pattern we want to match
# let's use the PRE motif which is a binding site for
# a transcription factor
# based on this paper:
#
EcoRI   = "GAATTC"
Bsu15I  = "ATCGAT"
Bsu36I  = "CCT[ACGT]AGG"
BsuRI   = "GGCC"
EcoRII  = "CC[AT]GG"
RestrictionEnzymes = [EcoRI, Bsu15I, Bsu36I,
                      BsuRI, EcoRII]
# Now let's search for this element in DNA sequence
DNA = rand_DNA(100000)
#print DNA
for RE in RestrictionEnzymes:
    pattern = re.compile(RE)
    match = pattern.search(DNA)
    count = pattern.findall(DNA)
    print(RE,"matches", len(count), "sites")
#    while match:
#        print match.group(0), match.start(), match.end()
#        match = pattern.search(DNA,match.end()+1)
    print( "//")