本文介紹在 CentOS 8 上使用 PyAutoGUI 操作運行與 Xvfb 桌面環境中的 Firefox 所需的準備。由於所操作網頁需要登錄並有 OTP,所以還需要使用 VNC 來手動輸入密碼以做準備工作。
安裝 Xvfb
sudo yum install xorg-x11-server-Xvfb #啓動 Xvfb Xvfb :0 -screen 0 1366x768x24+32 -br +bs -ac & #修改環境變量 DISPLAY export DISPLAY=:0 #查看環境變量 DISPLAY $DISPLAY #查看 Xvfb 進程 ps -ef | grep Xvfb
如果只是爲了使用 webdriver 操作瀏覽器那麼這樣安裝運行 Xvfb 後就可以了,但是我們要使用 PyAutoGUI,所以還有依賴要裝。
安裝火狐瀏覽器
我個人比較偏好 ESR 版火狐,下載後解壓就可以使用了。
tar -xf firefox-115.5.0esr.tar.bz2 cd Firefox ./firefox #如果要指定窗口大小可以 ./firefox -width 1350 -height 764
安裝 x11vnc 服務
其實只要下載執行檔,運行即可,非常簡單。不過 x11vnc 12 年未有更新,不知道還能用多久。如果那天不能用了可以嘗試 Tiger VNC。
mv x11vnc-0.9.13_amd64-Linux x11vnc chmod +x x11vnc #運行 vnc server,密碼設置長一點,端口是 9999 ./x11vnc -display :0 -ncache 0 -passwd piHrcHxmJauvIOftenUseA64digitPasswordpokiHJHQWdsgcGFTG -rfbport 9999 #上面命令在一次連接後會停止,如果要服務一直可以用,就加上 forever 參數 ./x11vnc -display :0 -ncache 0 -passwd piHrcHxmJauvIOftenUseA64digitPasswordpokiHJHQWdsgcGFTG -rfbport 9999 -shared -forever #注意服務器的網絡防火牆要打開 9999 端口 #用 nmap 查看端口是否打開 nmap -p 9999 1.1.1.1
Fedora 本地可以安裝 Remmina 作爲 VNC 客戶端。
sudo dnf install remmina -y #如果想要使用 socks5 代理 proxychains4 /usr/bin/remmina
打開 Remmina 後在地址欄的協議選項裏選擇 VNC,然後地址填 1.1.1.1:9999 回車就可以連上服務器的桌面並看到剛剛打開的火狐瀏覽器進行設置了。點左上角的 + 號可以添加 profile,這樣下次雙擊就能連上 VNC 了。
安裝與配置 PyAutoGUI
我自己從源碼編譯的 Python 一直提示沒有 _tkinter 模塊。搜了半天沒有解決,於是使用系統自帶的 Python 3.9 得以解決。雖然我是用 Python 3.11 開發的,但是 Python 3.9 跑起來也沒問題。
import _tkinter # If this fails your Python may not be configured for Tk ImportError: No module named _tkinter
#安裝 Python 3.9 以及依賴 sudo yum install libnsl ImageMagick xclip sudo yum install python39 python39-tkinter #xdotool 並不支持 Xvfb,所以不用裝 #sudo yum install xdotool #創建虛擬環境 cd your_project python3.9 -m venv venv source venv/bin/activate pip install -r requirements.txt
示例代碼
import json import random import sys import requests import time import logging import os import pyperclip import subprocess from urllib.parse import urlparse, parse_qs, urlunparse from dotenv import load_dotenv import psutil import pyautogui import pyscreeze script_dir = os.path.dirname(os.path.realpath(__file__)) load_dotenv() api_key = os.getenv("API_KEY") proxy = os.getenv("PROXY") de = os.getenv("DE") batch = int(os.getenv("BATCH")) if proxy is None: proxies = None else: proxies = {"http": proxy, "https": proxy} logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" ) logger = logging.getLogger() # Create a file handler to save logs to a file log_file = os.path.join(script_dir, "logfile.log") file_handler = logging.FileHandler(log_file) # Set the logging level for the file handler (if different from the root logger) file_handler.setLevel(logging.INFO) # Adjust the log level if needed # Create a formatter and add it to the file handler formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") file_handler.setFormatter(formatter) # Add the file handler to the logger logger.addHandler(file_handler) def activate_window(window_title): if de == "Xvfb": firefox_exists = False for proc in psutil.process_iter(): try: if "firefox" in proc.name().lower(): firefox_exists = True break except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess): pass if firefox_exists is False: subprocess.Popen( [ "/home/centos/fred/programs/firefox/firefox", "-width", "1350", "-height", "764", ] ) logger.info("firefox started") time.sleep(10) elif de == "Xfce": command = f"xdotool search --onlyvisible --name '{window_title}' windowactivate" subprocess.run(command, shell=True) def wait_for_image(image_path, timeout=10): start_time = time.time() while time.time() - start_time < timeout: try: location = pyautogui.locateOnScreen( image_path, region=(0, 0, 1360, 760), confidence=0.9, grayscale=True ) if location: return location except pyautogui.ImageNotFoundException: pass time.sleep(1.5) return None def wait_for_images(image_path, timeout=10): start_time = time.time() while time.time() - start_time < timeout: try: location = pyautogui.locateAllOnScreen( image_path, region=(0, 0, 1366, 760), confidence=0.9 ) if location: return list(location) except pyautogui.ImageNotFoundException: pass except pyscreeze.ImageNotFoundException: pass time.sleep(1.5) return [] def right_click(): pyautogui.mouseDown(button="right") # Perform a right-click time.sleep(0.1) # Adjust the delay if needed (time in seconds) pyautogui.mouseUp(button="right") # Release the right-click def triple_click(x, y): for _ in range(3): pyautogui.click(x, y) time.sleep(0.1) def get_data(obj): start_time = time.time() time.sleep(0.5) activate_window("Mozilla Firefox") time.sleep(0.5) # load webpage url_profile = obj["contact_name_href"] if url_profile.find("?") != -1: url_profile = url_profile[: url_profile.find("?")] logger.info(url_profile) # open a new tab # pyautogui.hotkey('ctrl', 't') pyautogui.hotkey("ctrl", "l") time.sleep(0.5) pyautogui.typewrite(url_profile) time.sleep(0.5) pyautogui.press("enter") time.sleep(8) # check page loaded image_path = os.path.join(script_dir, "img/ContactDetails.png") loaded = wait_for_image(image_path, 40) # Box(left=535, top=541, width=74, height=19) if loaded: logger.info("Page loaded") # LinkedIn image_path = os.path.join(script_dir, "img/LinkedIn.png") l = wait_for_images(image_path, 3) l_index = 0 retry = 0 temp_l = None while l_index < len(l): logger.info(f"l_index:{l_index+1}/{len(l)}") if temp_l is not None: if temp_l[0] == l[l_index][0]: l_index = l_index + 1 continue li = l[l_index] pyautogui.moveTo(li, duration=2, tween=pyautogui.easeInOutQuad) time.sleep(0.5) pyautogui.click(li) time.sleep(2) pyautogui.hotkey("ctrl", "l") time.sleep(0.5) pyautogui.hotkey("ctrl", "c") time.sleep(0.5) pyautogui.hotkey("ctrl", "w") time.sleep(0.5) copied_text = pyperclip.paste() url = copied_text.strip() url = get_link_from_redirect(url) if url is not None and url != "": # logger.info(len(url)) if url.find("linkedin") == -1: logger.info("not found 'linkedin'") if retry < 3: retry = retry + 1 else: l_index = l_index + 1 continue # personal LindedIn if li[0] < loaded_x_left: obj["LinkedIn_Personal_URL"] = url else: obj["LinkedIn_URL"] = url temp_l = li l_index = l_index + 1 # Supplemental_Email image_path = os.path.join(script_dir, "img/Supplemental.png") l = wait_for_image(image_path, 3) if l: pyautogui.moveTo( l[0] + l[2] + 50 + random.randint(-5, 5), l[1] + l[3] / 2, duration=1, tween=pyautogui.easeInOutQuad, ) time.sleep(1) right_click() time.sleep(1) pyautogui.hotkey("l") time.sleep(1) pyautogui.hotkey("esc") # [email protected] copied_text = pyperclip.paste() obj["Supplemental_Email"] = copied_text.strip() # Local address image_path = os.path.join(script_dir, "img/Local.png") l = wait_for_image(image_path, 3) if l: x = l[0] + l[2] + 30 + random.randint(-5, 5) pyautogui.moveTo(x, l[1] + l[3] / 2, duration=1, tween=pyautogui.easeInOutQuad) time.sleep(1) triple_click(x, l[1] + l[3] / 2) time.sleep(1) pyautogui.hotkey("ctrl", "c") time.sleep(1) # [email protected] copied_text = pyperclip.paste() obj["Local_Location_Address"] = copied_text.strip() def main(max_id): failed_list = [] for i in range(batch): logger.info(f"batch:{i+1}/{batch}") obj = get_job() if obj is None: continue if obj["contact_name_href"] == "" or obj["contact_name_href"] is None: res = post_job(obj) logger.info(res) continue if obj["id"] > max_id: logger.info("max_id reached") continue obj = get_data(obj) if obj["message"] != "success": if obj["id"] in failed_list: res = post_job(obj) logger.info(res) else: failed_list.append(obj["id"]) continue res = post_job(obj) logger.info(res) if __name__ == "__main__": # time.sleep(10) # countdown for i in range(6): logger.info(f"countdown:{6-i}") time.sleep(1) # get max id from command if len(sys.argv) > 1: max_id = int(sys.argv[1]) logger.info(f"max_id:{max_id}") else: max_id = 9999999 main(max_id)