程式師世界 >> 編程語言 >> 更多編程語言 >> Python >> Python多線程斷點續傳下載文件

Python多線程斷點續傳下載文件

編輯：Python

要先實現斷點續傳或者分段下載，首先明白 header的Range字段
簡單來說把一個文件分成多個斷點，分別開個進程請求
如果一個請求的文件大小是1024Btes，劃分成4段
請求的header帶上 Rangebytes=0-256,Rangebytes=257-513,Rangebytes=514-770,Rangebytes=771-1024

下面貼出Python的實現代碼

#!/usr/bin/python
# -*- coding: utf-8 -*-
# filename: download.py
# from www.biuman.com

import threading
import time
import urllib2
import re
import string
import sys
import os

class Mydown(threading.Thread):
"""docstring for Download"""
def __init__(self, threadname,url,ranges,filename):
super(Mydown, self).__init__()
self.threadname = threadname
self.url = url
self.ranges = ranges
self.filename = filename
self.downloadsize = 0
def run(self):
try:
self.downloadsize = os.path.getsize( self.filename )
except OSError:
self.downloadsize = 0

self.startpoint = self.ranges[0] +self.downloadsize
print 'thread_%s downloading from %d to %d' %(self.threadname,self.startpoint,self.ranges[1])

try:
request = urllib2.Request(self.url)
request.add_header("Range", "bytes=%d-%d" % (self.startpoint, self.ranges[1]))
response = urllib2.urlopen(request)
self.oneTimeSize = 16384 #16kByte/time
data = response.read(self.oneTimeSize)
while data:
handle = open(self.filename,'ab+')
handle.write(data)
handle.close()

self.downloadsize += len(data)
data = response.read(self.oneTimeSize)
except Exception, e:
print 'download error:',e
return False
return True

def getUrlFileSize(url):
res = urllib2.urlopen(url)
headers =res.info().headers #heaer info array
for v in headers:
#whether support accept-ranges
# if v.find('Ranges') > 0:
# print 'done'
if v.find('Length') > 0:
size = v.split(':')[1].strip()
size = int(size)
return size

def splitBlock(totalsize,blocks):
blocksize = totalsize/blocks
ranges = []
for x in xrange(0,blocks-1):
ranges.append([x*blocksize,x*blocksize+blocksize-1])
ranges.append([blocksize*(blocks-1),totalsize]) #deal with last block

return ranges
def islive(tasks):
for task in tasks:
if task.isAlive():
return True
return False

def startDown(url,output,blocks):

size = getUrlFileSize(url)
ranges = splitBlock(size,blocks)
filename = [ "tmpfile_%d" % i for i in xrange(0, blocks) ]
tasks = []
for x in xrange(0,blocks):
t = Mydown(x,url,ranges[x],filename[x])
t.setDaemon( True )
t.start()
tasks.append( t )

time.sleep( 2 )
while islive(tasks):
downloaded = sum( [task.downloadsize for task in tasks] )
process = downloaded/float(size)*100
show = u'\rFilesize:%d Downloaded:%d Completed:%.2f%%' % (size, downloaded, process)
sys.stdout.write(show)
sys.stdout.flush()
time.sleep( 0.5 )

filehandle = open( output, 'wb+' )
for i in filename:
f = open( i, 'rb' )
filehandle.write( f.read() )
f.close()
try:
os.remove(i)
pass
except:
pass

filehandle.close()

if __name__ == '__main__':
url = 'http://ftp-idc.pconline.com.cn/6f58e0a45ffe9a32a73ff2cc4adfeb37/pub/download/201010/SafariSetup.exe'
output = 'SafariSetup.exe'
startDown( url, output, blocks=5)