import requests r = requests.get("http://www.cnblogs.com/yoyoketang/") print(r.elapsed.total_seconds())
2. Multiprocessing
import multiprocessing as mp import time, re import requests
List = ["www.baidu.com", "https://tongji.baidu.com/web/10000138058/overview/index?siteId=14350939","https://karobben.github.io/","https://space.bilibili.com/393056819","https://github.com/Karobben","https://www.yuque.com/dashboard/books","haishdiashdiahsdiuhsaiudha"]
defRespTime(url): # Page is exist or not try: r = requests.get(url, timeout=20) print("("+url+")(Update:"+time.strftime("%D")+ " "+ str(r.elapsed.total_seconds())+"s)" ) except: print(url+"\tFailed")
defmulticore(List, Pool=10): pool = mp.Pool(processes=Pool) for i in List: multi_res = [pool.apply_async(RespTime,(i,))] pool.close() pool.join()
3. Extract urls from Markdonw File
1. Extract and Calculates Responding Time
Using this Script to test the links in Markdonw file
reference: 张土豆
import multiprocessing as mp import time, re import requests
Input = "/media/ken/Data/Github/Yuque/Bioinfor/test2.md" F = open(Input,'r') File = F.read()
pattern = re.compile(r'\(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+\)') # 匹配模式 List = pattern.findall(File)
defRespTime(url,return_dict): # Page is exist or not url = url.replace('(','').replace(")","") try: r = requests.get(url, timeout=20) Result = url+"\t"+str(r.elapsed.total_seconds()) except: Result = url+"\tFailed" return_dict[Result] = Result
if __name__ == '__main__': manager = mp.Manager() return_dict = manager.dict() jobs = [] for i in List: p = mp.Process(target=RespTime, args=(i,return_dict)) jobs.append(p) p.start() for proc in jobs: proc.join()
import multiprocessing as mp import time, re import requests
Input = "/media/ken/Data/Github/Yuque/Bioinfor/test2.md" F = open(Input,'r') File = F.read()
pattern = re.compile(r'\(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+\) \((?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F])| )+'+"\)") # 匹配模式 List = pattern.findall(File)
defRespTime(url,return_dict): # Page is exist or not url = url.split(')')[0].replace("(","") try: try: r = requests.get(url, timeout=20) rtime = str(r.elapsed.total_seconds())+"s)" except: rtime = "OutOfTime)" Result = "("+url+") (Update:"+time.strftime("%D")+ "; "+ rtime except: Result = "("+url+") (Update:"+time.strftime("%D")+ "; Failed)" return_dict[Result] = Result
if __name__ == '__main__': manager = mp.Manager() return_dict = manager.dict() jobs = [] for i in List: p = mp.Process(target=RespTime, args=(i,return_dict)) jobs.append(p) p.start() for proc in jobs: proc.join()
DB="\n".join(return_dict.values())
for i in List: Str = i.split(")")[0].replace("(",'') #pattern = re.compile(Str+r"\)[ ]\((?:[a-zA-Z]|[0-9]|[:/|\.]|;)+[ ][0-9]|[a-zA-Z]|\.?+") pattern = re.compile(Str+"\) \(.+") i2 = "("+pattern.findall(DB)[0] print(i,i2,sep='\n') File = File[:File.find(i)] + i2+ File[File.find(i)+len(i):]
import multiprocessing as mp import time, re import requests
F = open(INPUT,'r') File = F.read()
pattern = re.compile(r'\(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+\) \((?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F])| )+'+"\)") # 匹配模式 List = pattern.findall(File)
defRespTime(url,return_dict): # Page is exist or not url = url.split(')')[0].replace("(","") try: try: r = requests.get(url, timeout=20) rtime = str(r.elapsed.total_seconds())+"s)" except: rtime = "OutOfTime)" Result = "("+url+") (Update:"+time.strftime("%D")+ "; "+ rtime except: Result = "("+url+") (Update:"+time.strftime("%D")+ "; Failed)" return_dict[Result] = Result
if __name__ == '__main__': manager = mp.Manager() return_dict = manager.dict() jobs = [] for i in List: p = mp.Process(target=RespTime, args=(i,return_dict)) jobs.append(p) p.start() for proc in jobs: proc.join()
DB="\n".join(return_dict.values())
for i in List: Str = i.split(")")[0].replace("(",'') #pattern = re.compile(Str+r"\)[ ]\((?:[a-zA-Z]|[0-9]|[:/|\.]|;)+[ ][0-9]|[a-zA-Z]|\.?+") pattern = re.compile(Str+"\) \(.+") i2 = "("+pattern.findall(DB)[0] print(i,i2,sep='\n') File = File[:File.find(i)] + i2+ File[File.find(i)+len(i):]
F = open(OUTPUT,'w') F.write(File) F.close()
How to Using Python to Acquire Websites' Responding Time