0 Posted 2021-07-13Updated 2024-01-11a minute read (About 149 words)

Biology meetings information

Meetings

from bs4 import BeautifulSoup
from urllib.request import urlopen
import re

source = "http://meeting.sciencenet.cn/index.php?s=/Category/mth_meeting&mth=2021-07"
html = urlopen(source).read().decode('utf-8')
soup = BeautifulSoup(html, features='lxml')
Meeting_list = soup.find_all("div",{"class":"col-md-6"})

Num_m = len(Title)

for Meeting in Meeting_list:
  Tmp, Date = Get_Inf(Meeting)
  print(Date, Tmp)

def Get_Inf(Meeting):
  try:
    Title = Meeting.find("span",{"class" : "aa"}).get_text()
  except:
    Title = "404"
  #
  try:
    Tail = Meeting.find("a").get('href')
    #
    html = urlopen(source+"/"+Tail).read().decode('utf-8')
    soup = BeautifulSoup(html, features='lxml')
    Text = soup.get_text()
    #
    date_pattern="(\d{1,4}年)((([0?][1-9])月)|(([1?][0-2])月)|([1-9]月)?)(([0?][1-9]日)|([1?][0-9]日)|([2?][1-9]日)|([3][0-1]日)?)"
    res=re.search(date_pattern,Text)
    Date = res.group()
  except:
    Date = "404"
  #
  return Title, Date

"会议时间：" in Text