0 Posted 2021-07-13Updated 2024-01-11a minute read (About 149 words)Biology meetings informationMeetings 科学网会议系统 from bs4 import BeautifulSoupfrom urllib.request import urlopenimport resource = "http://meeting.sciencenet.cn/index.php?s=/Category/mth_meeting&mth=2021-07"html = urlopen(source).read().decode('utf-8')soup = BeautifulSoup(html, features='lxml')Meeting_list = soup.find_all("div",{"class":"col-md-6"})Num_m = len(Title)for Meeting in Meeting_list: Tmp, Date = Get_Inf(Meeting) print(Date, Tmp)def Get_Inf(Meeting): try: Title = Meeting.find("span",{"class" : "aa"}).get_text() except: Title = "404" # try: Tail = Meeting.find("a").get('href') # html = urlopen(source+"/"+Tail).read().decode('utf-8') soup = BeautifulSoup(html, features='lxml') Text = soup.get_text() # date_pattern="(\d{1,4}年)((([0?][1-9])月)|(([1?][0-2])月)|([1-9]月)?)(([0?][1-9]日)|([1?][0-9]日)|([2?][1-9]日)|([3][0-1]日)?)" res=re.search(date_pattern,Text) Date = res.group() except: Date = "404" # return Title, Date"会议时间:" in Text Biology meetings informationhttps://karobben.github.io/2021/07/13/Blog/Biomeeting/AuthorKarobbenPosted on2021-07-13Updated on2024-01-11Licensed under