0 Daumen
200 Aufrufe

Ich versuche, die Fragen und Antworten der Google-Formulare aus einer csv-Datei zu kratzen. Leider scheint es, als würde ich die Fragebögen durchgehen, ohne die Fragen und Antworten zu erhalten. Tatsächlich habe ich mit meinem Code :

    from collections import defaultdict
    from selenium import webdriver
    from selenium.webdriver.common.keys import Keys
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.common.exceptions import ElementClickInterceptedException
    import time
    from selenium.webdriver.chrome.options import Options
    from selenium.common.exceptions import NoSuchElementException
    import pandas as pd
    from selenium.common.exceptions import ElementNotInteractableException, NoSuchElementException
    from webdriver_manager.chrome import ChromeDriverManager
    from selenium.webdriver.chrome.options import Options
    from selenium.webdriver.support.ui import WebDriverWait
    import pickle
   
   
    def setup_chromedriver():
        chrome_options = Options()
        chrome_options.add_argument("--headless")
        # driver = webdriver.Chrome(ChromeDriverManager().install())
        driver = webdriver.Chrome("C:\Programs\chromedriver.exe")
        return driver
   
   
    def load_data(survey):
        df = pd.read_csv("research_assistant_intern_recruitment.csv")
        filter_col = ["Link"]
        return df, filter_col
   
   
    def extract(driver, df, column):
        locations = df[column].iteritems()
        count = 0
        result = []
        for location in locations:
            questionaire = {}
            if "docs.google.com" in str(location[1]):
                location = location[1]
                driver.get(location)
   
                # I get all the card with questions and answers inside
                containers = driver.find_elements_by_class_name(
                    "freebirdFormeditorViewItemcardRoot.item-dlg-affectsIndex.item-dlg-dragTarget"
                    )
   
                # for each card
                for container in containers:
                    try:
                        # Get the question
                        question = container.find_element_by_class_name(
                            "appsMaterialWizTextinputTextareaInput.exportTextarea"
                            )
                    except NoSuchElementException:
                        print("NoSuchElementException: ")
                        continue
   
                    # Get the answers
                    responses = container.find_elements_by_class_name(
                        "quantumWizTextinputSimpleinputInput.exportInput"
                        )
                    extracted_responses = [response.get_attribute("data-initial-value") for response in responses]
   
                    questionaire[question.text] = extracted_responses
                # btns = driver.find_elements_by_class_name("appsMaterialWizButtonPaperbuttonContent.exportButtonContent")
                btns_next = driver.find_elements_by_class_name('appsMaterialWizButtonPaperbuttonContent.exportButtonContent')
                if btns_next:
                    print("btns_next: ", btns_next)
                    btns_next[0].click()
                    driver.set_page_load_timeout(30)
                    driver.set_script_timeout(30)
   
            result.append({count: questionaire})
   
   
   
            count += 1
        return result
   
    if __name__ == '__main__':
        """ Need to log on to the google account to access certain questionaires. Also Setup chromedriver to run in
        headless state """
        driver = setup_chromedriver()
        """ Load CSV download of Google Sheet """
        sources = ["Airline XYZ", "Hotel Chain ABC", "The Ministry of Tourism of France"]
        for s in sources:
            df, columns = load_data(s)
            for column in columns:
                result = extract(driver, df, column)
                s = s.replace(" ", "_")
                pickle_out = open(s + "_c.p", "wb")
                pickle.dump(result, pickle_out)
                pickle_out.close()


Ich bekomme die Fragen und Antworten aus dem ersten Fragebogen. Jedes Mal, wenn ich einen anderen Fragebogen durchgehe, muss ich jedoch die Schaltfläche 'Weiter' drücken. Aber dann, wenn wir zu einem neuen Fragebogen navigieren, öffnet sich jedes Mal eine neue Registerkarte, die die Startseite ist, und ich möchte nicht, dass diese neue Registerkarte geöffnet wird.


Avatar von

Ein anderes Problem?

Stell deine Frage

Willkommen bei der Stacklounge! Stell deine Frage einfach und kostenlos

x
Made by a lovely community