Create main.py

This commit is contained in:
Wang Zhiyu 2023-07-25 07:23:54 +08:00 committed by GitHub
commit ad58f820bb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

81
main.py Normal file
View File

@ -0,0 +1,81 @@
import random
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
import json
URL = "https://www.luogu.com.cn/training/list"
options = Options()
options.add_argument("--headless") # 无头模式
options.set_preference("permissions.default.image", 2) # 无图模式
profile = FirefoxProfile()
profile.set_preference("permissions.default.frame", 3) # 禁用加载 iframe 的功能 (bilibili嵌套)
options.profile = profile
driver = webdriver.Firefox(options=options)
driver.get(URL)
print("[LOG] 加载索引")
TITLE_XPATH_TEMPLATE = '//*[@id="app"]/div[2]/main/div/div[2]/div/div[1]/div[2]/div[TDNUM]/span[2]/a'
TDID_XPATH_TEMPLATE = '//*[@id="app"]/div[2]/main/div/div[2]/div/div[1]/div[2]/div[TDNUM]/span[1]'
title_elements = list()
titles = list()
tdid_elements = list()
tdids = list()
for i in range(1, 41):
ele1 = driver.find_element(By.XPATH, TITLE_XPATH_TEMPLATE.replace("TDNUM", str(i)));
ele2 = driver.find_element(By.XPATH, TDID_XPATH_TEMPLATE.replace("TDNUM", str(i)));
title_elements.append(ele1)
tdid_elements.append(ele2)
for title_element in title_elements:
titles.append(title_element.text)
for tdid_element in tdid_elements:
tdids.append(tdid_element.text)
print("[LOG] 成功加载索引")
# print(titles)
# print(tdids)
TID_TEMPLATE = '//*[@id="app"]/div[2]/main/div/div[2]/div/div[1]/div[2]/div[TNUM]/span[2]'
cnt = 0
plancfg = list()
descriptions = list()
for tdid in tdids:
print("[LOG] 加载编号: " + tdid)
cnt += 1
tids = list()
driver.get("https://www.luogu.com.cn/training/" + tdid)
eleone = driver.find_element(By.XPATH, '//*[@id="app"]/div[2]/main/div/div[2]/section[2]/div/div[2]')
descriptions.append(eleone.text)
tab2 = driver.find_element(By.XPATH, '//*[@id="app"]/div[2]/main/div/div[1]/div/ul/li[2]/span')
tab2.click()
totalnum_ele = driver.find_element(By.XPATH, '//*[@id="app"]/div[2]/div[1]/div[2]/div[2]/div[1]/div/div[1]/span[2]')
for i in range(1, int(totalnum_ele.text)):
tidone = driver.find_element(By.XPATH, TID_TEMPLATE.replace("TNUM", str(i)))
tid = tidone.text
tids.append("LG" + tid) # 适配 XJYOJ
totalinf = dict()
totalinf = {"_id": cnt, "title": titles[cnt - 1], "requireNids": [], "pids": tids}
plancfg.append(totalinf)
markdown_description = ""
for i,j in zip(descriptions, titles):
markdown_description += "\n"
markdown_description += "## "
markdown_description += j
markdown_description += "\n"
markdown_description += i
jsoncfg = json.dumps(plancfg)
with open('cfg.json', 'w') as file1:
file1.write(jsoncfg)
with open('description.md', 'w') as file2:
file2.write(markdown_description)
with codecs.open('cfg.json', 'r', encoding='unicode_escape') as f:
content = f.read()
with codecs.open('cfg.json', 'w', encoding='utf-8') as f:
f.write(content)
with open('description.md', 'r') as f:
content = f.read()
modified_content = content.replace('\n', ' \n')
with open('description.md', 'w') as f:
f.write(modified_content)
driver.quit()