Ich versuche, die Fragen und Antworten der Google-Formulare aus einer csv-Datei zu kratzen. Leider scheint es, als würde ich die Fragebögen durchgehen, ohne die Fragen und Antworten zu erhalten. Tatsächlich habe ich mit meinem Code :
from collections import defaultdict
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import ElementClickInterceptedException
import time
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException
import pandas as pd
from selenium.common.exceptions import ElementNotInteractableException, NoSuchElementException
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
import pickle
def setup_chromedriver():
chrome_options = Options()
chrome_options.add_argument("--headless")
# driver = webdriver.Chrome(ChromeDriverManager().install())
driver = webdriver.Chrome("C:\Programs\chromedriver.exe")
return driver
def load_data(survey):
df = pd.read_csv("research_assistant_intern_recruitment.csv")
filter_col = ["Link"]
return df, filter_col
def extract(driver, df, column):
locations = df[column].iteritems()
count = 0
result = []
for location in locations:
questionaire = {}
if "docs.google.com" in str(location[1]):
location = location[1]
driver.get(location)
# I get all the card with questions and answers inside
containers = driver.find_elements_by_class_name(
"freebirdFormeditorViewItemcardRoot.item-dlg-affectsIndex.item-dlg-dragTarget"
)
# for each card
for container in containers:
try:
# Get the question
question = container.find_element_by_class_name(
"appsMaterialWizTextinputTextareaInput.exportTextarea"
)
except NoSuchElementException:
print("NoSuchElementException: ")
continue
# Get the answers
responses = container.find_elements_by_class_name(
"quantumWizTextinputSimpleinputInput.exportInput"
)
extracted_responses = [response.get_attribute("data-initial-value") for response in responses]
questionaire[question.text] = extracted_responses
# btns = driver.find_elements_by_class_name("appsMaterialWizButtonPaperbuttonContent.exportButtonContent")
btns_next = driver.find_elements_by_class_name('appsMaterialWizButtonPaperbuttonContent.exportButtonContent')
if btns_next:
print("btns_next: ", btns_next)
btns_next[0].click()
driver.set_page_load_timeout(30)
driver.set_script_timeout(30)
result.append({count: questionaire})
count += 1
return result
if __name__ == '__main__':
""" Need to log on to the google account to access certain questionaires. Also Setup chromedriver to run in
headless state """
driver = setup_chromedriver()
""" Load CSV download of Google Sheet """
sources = ["Airline XYZ", "Hotel Chain ABC", "The Ministry of Tourism of France"]
for s in sources:
df, columns = load_data(s)
for column in columns:
result = extract(driver, df, column)
s = s.replace(" ", "_")
pickle_out = open(s + "_c.p", "wb")
pickle.dump(result, pickle_out)
pickle_out.close()
Ich bekomme die Fragen und Antworten aus dem ersten Fragebogen. Jedes Mal, wenn ich einen anderen Fragebogen durchgehe, muss ich jedoch die Schaltfläche 'Weiter' drücken. Aber dann, wenn wir zu einem neuen Fragebogen navigieren, öffnet sich jedes Mal eine neue Registerkarte, die die Startseite ist, und ich möchte nicht, dass diese neue Registerkarte geöffnet wird.