[SOLVED] How to write scraped data to multiple columns in excel with pandas data frames?

Issue

This Content is from Stack Overflow. Question asked by Cave Johnson123

How to scrape data to columns A, B, C and D? At the moment it is scraping only in the same column Team and Team odds instead of Team, Team Odds, Lay Team and Lay odds. How to fix scraping to same column instead of multiple where I am wanting to append this data. Thanks
###########################################################################################################################################################

options.add_argument('--start-maximized')
options.add_experimental_option("detach", True)
service = Service('driver/chromedriver.exe')
driver = webdriver.Chrome(service=Service(webdriver_manager.chrome.ChromeDriverManager().install()), options=options)
driver.get('https://www.betfair.com.au/exchange/plus/en/football-betting-1/13')
WebDriverWait(driver, 100).until(lambda driver: driver.execute_script('return document.readyState') == 'complete')
element = WebDriverWait(driver,20).until(lambda driver: driver.find_element(By.CSS_SELECTOR, ".name:nth-child(1) , bf-coupon-table:nth-child(1) tr:nth-child(1) .matched-amount-value"))
print('h')

scroll = driver.find_element(By.CSS_SELECTOR, ".coupon-page-navigation__label--next")
driver.execute_script("arguments[0].scrollIntoView();", scroll)

try:
    driver.find_element(By.CSS_SELECTOR, "#cookie-holder span").click()
except:
    pass

i = 1
while i < 2:
    try:
        time.sleep(2)
        #
        #bf-coupon-page-navigation > ul > li:nth-child(4) > abf-coupon-page-navigation > ul > li:nth-child(4) > a[class = 'coupon-page-navigation__link coupon-page-navigation__link--next']
        #driver.find_elements(By.CSS_SELECTOR, "bf-coupon-page-navigation > ul > li:nth-child(4) > abf-coupon-page-navigation > ul > li:nth-child(4) > a[class = 'coupon-page-navigation__link coupon-page-navigation__link--next']")
        WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.CSS_SELECTOR,  ".name:nth-child(1)")))
        WebDriverWait(driver, 15).until(lambda driver: driver.execute_script('return document.readyState') == 'complete')
        element = WebDriverWait(driver, 15).until(lambda driver: driver.find_element(By.CSS_SELECTOR, ".name:nth-child(1) , bf-coupon-table:nth-child(1) tr:nth-child(1) .matched-amount-value"))
        scroll = driver.find_element(By.CSS_SELECTOR, ".coupon-page-navigation__label--next")
        driver.execute_script("arguments[0].scrollIntoView();", scroll)
        NextStory = WebDriverWait(driver, 3).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'bf-coupon-page-navigation > ul > li:nth-child(4) > a')))
        match = driver.find_element(By.CSS_SELECTOR, ".name:nth-child(1)")

        matches = driver.find_elements(By.CSS_SELECTOR, "CSS_SELECTOR")

        Team1 = driver.find_elements(By.CSS_SELECTOR, ".name:nth-child(1)")
        for value in Team1:
            print(value.text)

        BackOdds = driver.find_elements(By.CSS_SELECTOR, ".coupon-runner:nth-child(1) .back-selection-button .bet-button-price")
        for value2 in BackOdds:
            print(value2.text)

        Team2 = driver.find_elements(By.CSS_SELECTOR, ".name:nth-child(2)")
        for value1 in Team2:
            print(value1.text)

        LayOdds = driver.find_elements(By.CSS_SELECTOR, ".coupon-runner:nth-child(1) .lay-selection-button .bet-button-price")
        for value3 in LayOdds:
            print(value3.text)
        #bf-coupon-page-navigation > ul > li:nth-child(4) > a
        driver.find_elements(By.CSS_SELECTOR, "bf-coupon-page-navigation > ul > li:nth-child(4) > a[class = 'coupon-page-navigation__link coupon-page-navigation__link--next']")

        WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.CSS_SELECTOR,  "bf-coupon-page-navigation > ul > li:nth-child(4) > a[class = 'coupon-page-navigation__link coupon-page-navigation__link--next']")))

        NextStory.click()


        for match in matches:
            #print(match.text)
            driver.find_elements(By.CSS_SELECTOR, "bf-coupon-page-navigation > ul > li:nth-child(4) > a[class = 'coupon-page-navigation__link coupon-page-navigation__link--next']")
            WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.CSS_SELECTOR, "bf-coupon-page-navigation > ul > li:nth-child(4) > a[class = 'coupon-page-navigation__link coupon-page-navigation__link--next']")))
            NextStory.click()
    except:
        i = 6
        print('error')
        pass

#PANDA'S SCRAPING TO EXCEL
########################################################################################################################################################################
ls = []

for value in Team1:
  df1 = ls.append(value.text)

for value2 in BackOdds:
  df2 = ls.append(value2.text)

for value1 in Team2:
  df3 = ls.append(value1.text)

for value3 in LayOdds:
  df4 = ls.append(value3.text)

df1 = pd.DataFrame(data=ls)
df2 = pd.DataFrame(data=ls)
df3 = pd.DataFrame(data=ls)
df4 = pd.DataFrame(data=ls)
#print(df1)
#print(df2)
#print(df3)
#print(df4)

#df1.to_excel('output.xlsx', engine='openpyxl', sheet_name='Sheet_name_1', startcol=1)
#df2.to_excel('output.xlsx', engine='openpyxl', sheet_name='Sheet_name_1', startcol=2)
#df3.to_excel('output.xlsx', engine='openpyxl', sheet_name='Sheet_name_1', startcol=3)
#df4.to_excel('output.xlsx', engine='openpyxl', sheet_name='Sheet_name_1', startcol=4)

with pd.ExcelWriter('output.xlsx', engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
    df1.to_excel(writer, engine='openpyxl', sheet_name='Sheet_name_1', startcol=2)
    df2.to_excel(writer, engine='openpyxl', sheet_name='Sheet_name_1', startcol=2)
    df3.to_excel(writer, engine='openpyxl', sheet_name='Sheet_name_1', startcol=3)
    df4.to_excel(writer, engine='openpyxl', sheet_name='Sheet_name_1', startcol=4)

print('Script Completed')



Solution

Well, I probably wouldn’t have done the code this way (I there’s an api request to get the data, don’t need to use Selenium, although Selenium will work see sample of the data here in nice json format). But this will get you the data into lists. Then use pandas to construct the dataframe.

Code:

...

try:
    driver.find_element(By.CSS_SELECTOR, "#cookie-holder span").click()
except:
    pass


team1List = []
backOddsList = []
team2List = []
layOddsList = []

i = 1
while i < 2:
    try:
        time.sleep(2)
        #
        #bf-coupon-page-navigation > ul > li:nth-child(4) > abf-coupon-page-navigation > ul > li:nth-child(4) > a[class = 'coupon-page-navigation__link coupon-page-navigation__link--next']
        #driver.find_elements(By.CSS_SELECTOR, "bf-coupon-page-navigation > ul > li:nth-child(4) > abf-coupon-page-navigation > ul > li:nth-child(4) > a[class = 'coupon-page-navigation__link coupon-page-navigation__link--next']")
        WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.CSS_SELECTOR,  ".name:nth-child(1)")))
        WebDriverWait(driver, 15).until(lambda driver: driver.execute_script('return document.readyState') == 'complete')
        element = WebDriverWait(driver, 15).until(lambda driver: driver.find_element(By.CSS_SELECTOR, ".name:nth-child(1) , bf-coupon-table:nth-child(1) tr:nth-child(1) .matched-amount-value"))
        scroll = driver.find_element(By.CSS_SELECTOR, ".coupon-page-navigation__label--next")
        driver.execute_script("arguments[0].scrollIntoView();", scroll)
        NextStory = WebDriverWait(driver, 3).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'bf-coupon-page-navigation > ul > li:nth-child(4) > a')))
        match = driver.find_element(By.CSS_SELECTOR, ".name:nth-child(1)")

        matches = driver.find_elements(By.CSS_SELECTOR, "CSS_SELECTOR")

        Team1 = driver.find_elements(By.CSS_SELECTOR, ".name:nth-child(1)")
        for value in Team1:
            print(value.text)
            team1List.append(value.text)
            
        BackOdds = driver.find_elements(By.CSS_SELECTOR, ".coupon-runner:nth-child(1) .back-selection-button .bet-button-price")
        for value2 in BackOdds:
            print(value2.text)
            backOddsList.append(value2.text)

        Team2 = driver.find_elements(By.CSS_SELECTOR, ".name:nth-child(2)")
        for value1 in Team2:
            print(value1.text)
            team2List.append(value1.text)

        LayOdds = driver.find_elements(By.CSS_SELECTOR, ".coupon-runner:nth-child(1) .lay-selection-button .bet-button-price")
        for value3 in LayOdds:
            print(value3.text)
            layOddsList.append(value3.text)
        #bf-coupon-page-navigation > ul > li:nth-child(4) > a
        driver.find_elements(By.CSS_SELECTOR, "bf-coupon-page-navigation > ul > li:nth-child(4) > a[class = 'coupon-page-navigation__link coupon-page-navigation__link--next']")

        WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.CSS_SELECTOR,  "bf-coupon-page-navigation > ul > li:nth-child(4) > a[class = 'coupon-page-navigation__link coupon-page-navigation__link--next']")))

        NextStory.click()


        for match in matches:
            #print(match.text)
            driver.find_elements(By.CSS_SELECTOR, "bf-coupon-page-navigation > ul > li:nth-child(4) > a[class = 'coupon-page-navigation__link coupon-page-navigation__link--next']")
            WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.CSS_SELECTOR, "bf-coupon-page-navigation > ul > li:nth-child(4) > a[class = 'coupon-page-navigation__link coupon-page-navigation__link--next']")))
            NextStory.click()
    except:
        i = 6
        print('error')
        pass

#PANDA'S SCRAPING TO EXCEL
########################################################################################################################################################################
df = pd.DataFrame(
    {
     'Team1':team1List,
     'Back Odds':backOddsList,
     'Team2': team2List,
     'Lay Odds': layOddsList})

df.to_excel('output.xlsx', engine='openpyxl', sheet_name='Sheet_name_1', index=False)

Output:

print(df)
              Team1 Back Odds                  Team2 Lay Odds
0         FC Anyang       2.5        Daejeon Citizen     2.84
1   Seoul E-Land FC      2.82              Gyeongnam     3.25
2         MFk Snina      1.08              FC Kosice       25
3   Rimavska Sobota      1.01  DUKLA BANSKA BYSTRICA         
4        Nove Zamky      1.08                Samorin     12.5
..              ...       ...                    ...      ...
91          Denmark      1.47                Tunisia     1.54
92           Mexico      2.74                 Poland     3.25
93           France      1.22              Australia     1.28
94          Morocco       4.6                Croatia      5.6
95          Germany      1.36                  Japan     1.48

[96 rows x 4 columns]


This Question was asked in StackOverflow by Cave Johnson123 and Answered by chitown88 It is licensed under the terms of CC BY-SA 2.5. - CC BY-SA 3.0. - CC BY-SA 4.0.

people found this article helpful. What about you?