Python检测URL状态,并追加保存200的URL:
1.Requests
#! /usr/bin/env python#coding=utf-8import sysimport requestsdef getHttpStatusCode(url): try: request = requests.get(url) httpStatusCode = request.status_code return httpStatusCode except requests.exceptions.HTTPError as e: return eif __name__ == "__main__": with open('1.txt', 'r') as f: for line in f: try: status = getHttpStatusCode(line.strip('\n'))#换行符 if status == 200: with open('200.txt','a') as f: f.write(line + '\n') print line else: print 'no 200 code' except Exception as e: print e
1 #! /usr/bin/env python 2 # -*--coding:utf-8*- 3 4 import requests 5 6 def request_status(line): 7 conn = requests.get(line) 8 if conn.status_code == 200: 9 with open('url_200.txt', 'a') as f:10 f.write(line + '\n')11 return line13 else:14 return None15 16 17 if __name__ == '__main__':18 with open('/1.txt', 'rb') as f:19 for line in f:20 try:21 purge_url = request_status(line.strip('\n'))22 except Exception as e:23 pass
2.Urllib
#! /usr/bin/env python#coding:utf-8import os,urllib,linecacheimport sysresult = list()for x in linecache.updatecache(r'1.txt'): try: a = urllib.urlopen(x.replace('/n','')).getcode() #print x,a except Exception,e: print e if a == 200: #result.append(x) #保存 #result.sort() #排序结果 #open('2.txt', 'w').write('%s' % '\n'.join(result)) #保存入结果文件 with open ('200urllib.txt','a') as f: ## r只读,w可写,a追加 f.write(x + '\n') else: print 'error'