Python实现快速排序算法

#-*-encoding:utf-8-*-
import sys,random,time
reload(sys)
sys.setdefaultencoding('utf8')
def singleSort(list,start,end):
    if start>=end:
        return -1
    pivotPos = start
    pivot = list[pivotPos]
    low = start
    high = end
    while low<high:
        while list[high]>=pivot and low<high:
            high = high-1
        if low>=high: 
            return pivotPos
        exchange(list,pivotPos,high)
        pivotPos=high
        while list[low]<=pivot and low<high:
            low = low+1
        if low>=high: 
            return pivotPos
        exchange(list,pivotPos,low)
        pivotPos=low
def exchange(list,pos1,pos2):
    temp = list[pos1]
    list[pos1] = list[pos2]
    list[pos2] = temp
def qsort(list,low,high):
    pivotpos = singleSort(list,low,high)
    if pivotpos<0:
        return
    qsort(list,low,pivotpos-1)
    qsort(list,pivotpos+1,high)
def generateRandList(num):
    list=[]
    random.seed()
    for x in range(num):
        list.append(random.randint(0,65535))
    return list
if __name__ == '__main__':
     list = generateRandList(10000)
     #list=[4,8,2,3,7,9]
    qsort(list,0,len(list)-1)
    print list

Python多线程之条件变量Condition的使用

1.wait()方法会将使用该方法的线程挂起,并释放占用的Lock(由于这个Lock是Condition隐含的Lock.因此可以对实例化的Condition对象进行acquire()操作).
2.notify()会唤醒某个挂起的线程进入竞争Lock资源状态.
简单范例:

<pre>
#-*-encoding:utf-8-*-
import sys,time
import threading
reload(sys)
sys.setdefaultencoding('utf-8')

cond = threading.Condition()
def p1():
    global cond
    while True:
        cond.acquire()
        print "1 Got Lock"
        cond.notify()
        print "1 notify 2"
        cond.wait()
        print '1 is waked up'
        time.sleep(3)
   
def p2():
    global cond
    while True:
        cond.acquire()
        print "2 Got Lock"
        cond.notify()
        print "2 notify 1"
        cond.wait()
        print '2 is waked up'
        time.sleep(3)
   
   
t1 = threading.Thread(target=p1)
t2 = threading.Thread(target=p2)
t1.start()
t2.start()
</pre>

Python多线程关于信号量的使用

SEM = threading.Semaphore(MaxThreadNumber)
SEM就是一个信号量对象,可以通过acquire()及release()函数来获取信号量及释放占有的信号量.
threading.BoundedSemaphore与Semaphore类似,区别在于当使用BoundedSemaphore信号量时,当release()时会检查是否超出信号量的范围.

Python:获取HTTP代理并验证有效性

#-*-encoding:utf-8-*-
"""
Author: Corpse
Last modified: 2014-10-15
Filename: proxy.py
Description: Process proxy
"""
import threading,socket,multiprocessing,thread,requests,urllib2,cookielib,urllib
import sys,time,re
proxyList = {'nn':[],'nt':[],'wn':[],'wt':[]}
LOCK = {'nn':thread.allocate_lock(),'nt':thread.allocate_lock(),'wn':thread.allocate_lock(),'wt':thread.allocate_lock()}
#因为代理数目过大,为防止开启过多线程,使用信号量进行规模控制
SEM = {'nn':threading.BoundedSemaphore(40),'nt':threading.BoundedSemaphore(40),'wn':threading.BoundedSemaphore(40),'wt':threading.BoundedSemaphore(40)}
def operator(proxyType,pageCounter):#获取每种代理类型的指定页数的代理,验证后发送给服务器
    proxySource = 'http://www.xici.net.co/'+proxyType
    #开启多线程探测IP,每一个页面开启一个线程进行探测
    for pageNumber in xrange(pageCounter):
        url = proxySource+"/"+str(pageNumber+1)
        t = threading.Thread(target=proxyGetor,args=(url,proxyType,))
        t.start()
        t.join()
    global proxyList,SEM
    print "[+]"+proxySource+"前"+str(pageCounter)+'页存在',len(proxyList[proxyType]),"个HTTP代理."
    #至此已获取所有同一类型的代理IP及端口号
    #开放多进程验证代理的有效性
    for pry in proxyList[proxyType]:
        SEM[proxyType].acquire()
        thd = threading.Thread(target=validator,args=(pry,proxyType,10,))
        thd.start()
        thd.join()
    print "[+]经多线程代理有效性检测,"+proxySource+"前"+str(pageCounter)+'页存在',len(proxyList[proxyType]),"个有效HTTP代理."
    sendContent = ''
    for proxy in proxyList[proxyType]:
        sendContent+=proxy+'|'
    if len(sendContent)>0:
        print "[+]开始向服务器发送"+proxyType+"类型的数据"
        sendTarget = 'http://116.90.81.108:8081/sniffer.php' 
        serverResponse=proxySender(sendTarget,proxyType+'#'+sendContent)
        print "[+]向服务器发送"+proxyType+"类型数据的操作已经结束."
        print "[+]服务器返回消息:"+serverResponse
def proxyGetor(target,proxyType):#获取代理操作器
    response = requests.get(target)
    responseText = response.text
    responseText=responseText.replace('\r','').replace('\n','').replace(' ','')
    if responseText is not None:
        #正则匹配出代理
        rst=re.findall('\d+\.\d+\.\d+\.\d+\d+.*?.*?HTTP',responseText)
        global proxyList
        global LOCK
        for row in rst:
            ippt=row.split('><')
            ip=ippt[0][4:-4]
            port=ippt[1][3:-4]
            proxy = ip+':'+port
            LOCK[proxyType].acquire()
            proxyList[proxyType].append(proxy)
            LOCK[proxyType].release()
    else:
        print "[-]"+target+" Has No response." 
    #print '|',cnt,'|'
    #print proxyType+"==>",proxyList[proxyType]
    #sem.release()
def validator(proxyServer,proxyType,internalTime):#代理有效性验证
    global SEM,proxyList,LOCK
    testTarget="http://www.baidu.com"
    opener = urllib2.build_opener(urllib2.ProxyHandler({'http':proxyServer}))
    try:
        rsp = opener.open(testTarget,timeout=internalTime)
        #print '[',proxyType,']',proxyServer,"is OK."
    except Exception,e:#出现异常,判定为无效代理
        #print '[',proxyType,']',proxyServer,"Found Exception."
        LOCK[proxyType].acquire()#获取互斥锁,从列表中删除此代理
        index = proxyList[proxyType].index(proxyServer)#未加入异常处理
        proxyList[proxyType].pop(index)
        LOCK[proxyType].release()    
    finally:
        SEM[proxyType].release()
        #pass
def proxySender(target,data):
    cj = cookielib.LWPCookieJar() 
    cookie_support = urllib2.HTTPCookieProcessor(cj) 
    opener = urllib2.build_opener(cookie_support, urllib2.HTTPHandler) 
    urllib2.install_opener(opener) 
    headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36', 
               'Referer' : 'http://www.d.com/'} 
    postData = {'F':'R',
                'B':'NBXXOO59X4DZUQWBPZ1LL5PONT6XDSL',
                'O':'1',
                'BI':data,}
    postData = urllib.urlencode(postData) 
    request = urllib2.Request(target, postData, headers) 
    response = urllib2.urlopen(request) 
    text = response.read()
    return text
def mainTrigger():#探测触发器
    proxyTypeTuple = ('nn','nt','wn','wt')
    pageCounter = 2
    #每种代理类型均使用一个单独的进程进行处理
    pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())#设置进程池大小
    for x in xrange(len(proxyTypeTuple)):
        pool.apply_async(operator,(proxyTypeTuple[x],pageCounter,))
    pool.close()#关闭进程申请
    pool.join()#当所有子进程结束后主进程继续执行
def getServerOrder():
    posturl = 'http://116.90.81.108:8081/sniffer.php' 
    cj = cookielib.LWPCookieJar() 
    cookie_support = urllib2.HTTPCookieProcessor(cj) 
    opener = urllib2.build_opener(cookie_support, urllib2.HTTPHandler) 
    urllib2.install_opener(opener) 
    headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36', 
               'Referer' : 'http://www.d.com/',
               'Connection':'Keep-Alive'
               } 
    postData = {'F':'T',
                'B':'NBXXOO59X4DZUQWBPZ1LL5PONT6XDSL',
                'O':'1',} 
    postData = urllib.urlencode(postData) 
    request = urllib2.Request(posturl, postData, headers) 
    response = urllib2.urlopen(request) 
    text = response.read()
    return text
if __name__=="__main__":    
    socket.setdefaulttimeout(10)    
    while(True):
        order = getServerOrder()
        if order!="":
            print "[+]"+time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))," Received New Order."
            mainTrigger()
        else:
            print "[-]"+time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))," No New Order."
        time.sleep(10)