Added Pandas code 4-29-2025
This commit is contained in:
		
							parent
							
								
									883afde41f
								
							
						
					
					
						commit
						83c99e9695
					
				
					 22 changed files with 410 additions and 0 deletions
				
			
		
							
								
								
									
										9
									
								
								bigos/lecture20250429/1CreateBasicSeries/hack.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								bigos/lecture20250429/1CreateBasicSeries/hack.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,9 @@
 | 
			
		|||
import pandas as pd
 | 
			
		||||
s1 = pd.Series([1.25,1.75,2.25,2.75,3.25],index=['a','b','c','d','e'])
 | 
			
		||||
print(s1)
 | 
			
		||||
s2 = pd.Series([6,11.2,4,2,1.1],index=['a','b','c','d','e'])
 | 
			
		||||
print(s1)
 | 
			
		||||
s3 = s1+s2
 | 
			
		||||
print(s3)
 | 
			
		||||
s4 = s1*s2
 | 
			
		||||
print(s4)
 | 
			
		||||
							
								
								
									
										3
									
								
								bigos/lecture20250429/1CreateBasicSeries/series01.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								bigos/lecture20250429/1CreateBasicSeries/series01.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,3 @@
 | 
			
		|||
import pandas as pd
 | 
			
		||||
s1 = pd.Series([1.25,1.75,2.25,2.75,3.25])
 | 
			
		||||
print(s1)
 | 
			
		||||
							
								
								
									
										3
									
								
								bigos/lecture20250429/1CreateBasicSeries/series02.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								bigos/lecture20250429/1CreateBasicSeries/series02.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,3 @@
 | 
			
		|||
import pandas as pd
 | 
			
		||||
s1 = pd.Series([1.25,1.75,2.25,2.75,3.25],index=['a','b','c','d','e'])
 | 
			
		||||
print(s1)
 | 
			
		||||
							
								
								
									
										5
									
								
								bigos/lecture20250429/1CreateBasicSeries/series03.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								bigos/lecture20250429/1CreateBasicSeries/series03.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,5 @@
 | 
			
		|||
import pandas as pd
 | 
			
		||||
s1 = pd.Series([1.25,1.75,2.25,2.75,3.25],index=['a','b','c','d','e'])
 | 
			
		||||
print(s1)
 | 
			
		||||
s3 = s1+5
 | 
			
		||||
print(s3)
 | 
			
		||||
							
								
								
									
										5
									
								
								bigos/lecture20250429/1CreateBasicSeries/series04.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								bigos/lecture20250429/1CreateBasicSeries/series04.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,5 @@
 | 
			
		|||
import pandas as pd
 | 
			
		||||
s1 = pd.Series([1.25,1.75,2.25,2.75,3.25])
 | 
			
		||||
print(s1)
 | 
			
		||||
s2 = s1[s1>2]
 | 
			
		||||
print(s2)
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,9 @@
 | 
			
		|||
import pandas as pd
 | 
			
		||||
import numpy as np
 | 
			
		||||
arr = np.array([10,20,30,40,50])
 | 
			
		||||
s = pd.Series(arr)
 | 
			
		||||
print(arr)
 | 
			
		||||
print(s)
 | 
			
		||||
t = np.sqrt(s)
 | 
			
		||||
print(t)
 | 
			
		||||
print(type(t))
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,5 @@
 | 
			
		|||
import pandas as pd
 | 
			
		||||
 | 
			
		||||
df = pd.DataFrame()
 | 
			
		||||
print("Create empty data frame")
 | 
			
		||||
print(df)
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,5 @@
 | 
			
		|||
import pandas as pd
 | 
			
		||||
s = pd.Series(['a','b','c','d'])
 | 
			
		||||
df = pd.DataFrame(s)
 | 
			
		||||
print("Create data frame from series")
 | 
			
		||||
print(df)
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,9 @@
 | 
			
		|||
import pandas as pd
 | 
			
		||||
name = pd.Series(['Bob','Sam'])
 | 
			
		||||
team = pd.Series(['Wild Bunch','Sleepy Team'])
 | 
			
		||||
dic = {'Name':name,'Team':team}
 | 
			
		||||
 | 
			
		||||
df = pd.DataFrame(dic)
 | 
			
		||||
 | 
			
		||||
print("Create data frame from a dictionary")
 | 
			
		||||
print(df)
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,23 @@
 | 
			
		|||
import pandas as pd
 | 
			
		||||
 | 
			
		||||
namesList = [{'FirstName':"Bob",     'LastName': "Smith"},
 | 
			
		||||
             {'FirstName':"Rusty",   'LastName': "Jones"},
 | 
			
		||||
             {'FirstName': "Tanner", 'LastName': "Golden"},
 | 
			
		||||
             {'FirstName': "Harry",   'LastName': "Chinook"}
 | 
			
		||||
             ]
 | 
			
		||||
df = pd.DataFrame(namesList)
 | 
			
		||||
print(df)
 | 
			
		||||
 | 
			
		||||
# Access data row wise using iterrows()
 | 
			
		||||
print(" ========== Row wise data =========")
 | 
			
		||||
for (row_index,row_value) in df.iterrows():
 | 
			
		||||
    print("\n Row index is :",row_index)
 | 
			
		||||
    print("\n Row Value is: ",row_value)
 | 
			
		||||
 | 
			
		||||
print("\n")
 | 
			
		||||
# Access data row wise using iterrows()
 | 
			
		||||
print(" ========== Column wise data =========")
 | 
			
		||||
#for (col_index,col_value) in df.iteritems():
 | 
			
		||||
for (col_index, col_value) in df.items() :
 | 
			
		||||
    print("\n Col index is :",col_index)
 | 
			
		||||
    print("\n Col Value is: ",col_value)
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,5 @@
 | 
			
		|||
import pandas as pd
 | 
			
		||||
 | 
			
		||||
df = pd.DataFrame()
 | 
			
		||||
print("Create empty data frame")
 | 
			
		||||
print(df)
 | 
			
		||||
							
								
								
									
										23
									
								
								bigos/lecture20250429/3PrintAndDescribe/CreateDataFrame04.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								bigos/lecture20250429/3PrintAndDescribe/CreateDataFrame04.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,23 @@
 | 
			
		|||
import pandas as pd
 | 
			
		||||
 | 
			
		||||
namesList = [{'FirstName':"Bob",     'LastName': "Smith"},
 | 
			
		||||
             {'FirstName':"Rusty",   'LastName': "Jones"},
 | 
			
		||||
             {'FirstName':"Tanner",   'LastName': "Golden"},
 | 
			
		||||
             {'FirstName':"Harry",    'LastName': "Chinook"}
 | 
			
		||||
             ]
 | 
			
		||||
df = pd.DataFrame(namesList)
 | 
			
		||||
print(df)
 | 
			
		||||
 | 
			
		||||
# Access data row wise using iterrows()
 | 
			
		||||
print(" ========== Row wise data =========")
 | 
			
		||||
for (row_index,row_value) in df.iterrows():
 | 
			
		||||
    print("\n Row index is :",row_index)
 | 
			
		||||
    print("\n Row Value is: ",row_value)
 | 
			
		||||
 | 
			
		||||
print("\n")
 | 
			
		||||
# Access data row wise using iterrows()
 | 
			
		||||
print(" ========== Column wise data =========")
 | 
			
		||||
#for (col_index,col_value) in df.iteritems():
 | 
			
		||||
for (col_index, col_value) in df.items() :
 | 
			
		||||
    print("\n Col index is :",col_index)
 | 
			
		||||
    print("\n Col Value is: ",col_value)
 | 
			
		||||
							
								
								
									
										7
									
								
								bigos/lecture20250429/3PrintAndDescribe/Describe01.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								bigos/lecture20250429/3PrintAndDescribe/Describe01.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,7 @@
 | 
			
		|||
# Statistics Summary of a Series
 | 
			
		||||
import pandas as pd
 | 
			
		||||
 | 
			
		||||
s1 = pd.Series([10,20,30,40,50])
 | 
			
		||||
print("Original\n", s1)
 | 
			
		||||
print("Describe")
 | 
			
		||||
print(s1.describe())
 | 
			
		||||
							
								
								
									
										32
									
								
								bigos/lecture20250429/4WebscrapeLocal/index.html
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								bigos/lecture20250429/4WebscrapeLocal/index.html
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,32 @@
 | 
			
		|||
<!DOCTYPE html>
 | 
			
		||||
<html lang="en">
 | 
			
		||||
<head>
 | 
			
		||||
    <meta charset="UTF-8">
 | 
			
		||||
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
 | 
			
		||||
    <title>Example HTML Table</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body>
 | 
			
		||||
 | 
			
		||||
<h2>Sample HTML Table</h2>
 | 
			
		||||
 | 
			
		||||
<table border="1">
 | 
			
		||||
    <tr>
 | 
			
		||||
        <th>Header 1</th>
 | 
			
		||||
        <th>Header 2</th>
 | 
			
		||||
        <th>Header 3</th>
 | 
			
		||||
    </tr>
 | 
			
		||||
    <tr>
 | 
			
		||||
        <td>Data 1-1</td>
 | 
			
		||||
        <td>Data 1-2</td>
 | 
			
		||||
        <td>Data 1-3</td>
 | 
			
		||||
    </tr>
 | 
			
		||||
    <tr>
 | 
			
		||||
        <td>Data 2-1</td>
 | 
			
		||||
        <td>Data 2-2</td>
 | 
			
		||||
        <td>Data 2-3</td>
 | 
			
		||||
    </tr>
 | 
			
		||||
</table>
 | 
			
		||||
 | 
			
		||||
</body>
 | 
			
		||||
</html>
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										50
									
								
								bigos/lecture20250429/4WebscrapeLocal/index2.html
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								bigos/lecture20250429/4WebscrapeLocal/index2.html
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,50 @@
 | 
			
		|||
<!DOCTYPE html>
 | 
			
		||||
<html lang="en">
 | 
			
		||||
<head>
 | 
			
		||||
    <meta charset="UTF-8">
 | 
			
		||||
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
 | 
			
		||||
    <title>Example HTML Table</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body>
 | 
			
		||||
 | 
			
		||||
<h2>Sample HTML Table</h2>
 | 
			
		||||
 | 
			
		||||
<table border="1">
 | 
			
		||||
    <tr>
 | 
			
		||||
        <th>Header 1</th>
 | 
			
		||||
        <th>Header 2</th>
 | 
			
		||||
        <th>Header 3</th>
 | 
			
		||||
    </tr>
 | 
			
		||||
    <tr>
 | 
			
		||||
        <td>Data 1-1</td>
 | 
			
		||||
        <td>Data 1-2</td>
 | 
			
		||||
        <td>Data 1-3</td>
 | 
			
		||||
    </tr>
 | 
			
		||||
    <tr>
 | 
			
		||||
        <td>Data 2-1</td>
 | 
			
		||||
        <td>Data 2-2</td>
 | 
			
		||||
        <td>Data 2-3</td>
 | 
			
		||||
    </tr>
 | 
			
		||||
</table>
 | 
			
		||||
<br/>
 | 
			
		||||
<table border="1">
 | 
			
		||||
    <tr>
 | 
			
		||||
        <th>Header 4</th>
 | 
			
		||||
        <th>Header 5</th>
 | 
			
		||||
        <th>Header 6</th>
 | 
			
		||||
    </tr>
 | 
			
		||||
    <tr>
 | 
			
		||||
        <td>Data 1-1</td>
 | 
			
		||||
        <td>Data 1-2</td>
 | 
			
		||||
        <td>Data 1-3</td>
 | 
			
		||||
    </tr>
 | 
			
		||||
    <tr>
 | 
			
		||||
        <td>Data 2-1</td>
 | 
			
		||||
        <td>Data 2-2</td>
 | 
			
		||||
        <td>Data 2-3</td>
 | 
			
		||||
    </tr>
 | 
			
		||||
</table>
 | 
			
		||||
 | 
			
		||||
</body>
 | 
			
		||||
</html>
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										17
									
								
								bigos/lecture20250429/4WebscrapeLocal/webscrapeTest01.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								bigos/lecture20250429/4WebscrapeLocal/webscrapeTest01.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,17 @@
 | 
			
		|||
# https://saturncloud.io/blog/how-to-extract-tables-from-html-with-python-and-pandas/
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
import pandas as pd
 | 
			
		||||
from bs4 import BeautifulSoup
 | 
			
		||||
 | 
			
		||||
# Read the HTML file into a Pandas dataframe
 | 
			
		||||
with open('index.html') as file:
 | 
			
		||||
    soup = BeautifulSoup(file, 'html.parser')
 | 
			
		||||
tables = pd.read_html(str(soup))
 | 
			
		||||
 | 
			
		||||
# Extract the table from the dataframe
 | 
			
		||||
table = tables[0]
 | 
			
		||||
print(table)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										44
									
								
								bigos/lecture20250429/4WebscrapeLocal/webscrapeTest01X.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								bigos/lecture20250429/4WebscrapeLocal/webscrapeTest01X.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,44 @@
 | 
			
		|||
# https://saturncloud.io/blog/how-to-extract-tables-from-html-with-python-and-pandas/
 | 
			
		||||
 | 
			
		||||
import pandas as pd
 | 
			
		||||
from bs4 import BeautifulSoup
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Library for opening url and creating
 | 
			
		||||
# requests
 | 
			
		||||
import urllib.request
 | 
			
		||||
import ssl
 | 
			
		||||
 | 
			
		||||
# pretty-print python data structures
 | 
			
		||||
from pprint import pprint
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Opens a website and read its
 | 
			
		||||
# binary contents (HTTP Response Body)
 | 
			
		||||
def url_get_contents(url):
 | 
			
		||||
 | 
			
		||||
	# Opens a website and read its
 | 
			
		||||
	# binary contents (HTTP Response Body)
 | 
			
		||||
 | 
			
		||||
	#making request to the website
 | 
			
		||||
	req = urllib.request.Request(url=url)
 | 
			
		||||
	f = urllib.request.urlopen(req)
 | 
			
		||||
 | 
			
		||||
	#reading contents of the website
 | 
			
		||||
	return f.read()
 | 
			
		||||
 | 
			
		||||
# https://stackoverflow.com/questions/27835619/urllib-and-ssl-certificate-verify-failed-error
 | 
			
		||||
ssl._create_default_https_context = ssl._create_unverified_context
 | 
			
		||||
 | 
			
		||||
webpage = url_get_contents('https://cset2.stcc.edu/~csetuser/pandas/index.html')
 | 
			
		||||
 | 
			
		||||
# Read the HTML file into a Pandas dataframe
 | 
			
		||||
soup = BeautifulSoup(webpage, 'html.parser')
 | 
			
		||||
tables = pd.read_html(str(soup))
 | 
			
		||||
 | 
			
		||||
# Extract the table from the dataframe
 | 
			
		||||
table = tables[0]
 | 
			
		||||
print(table)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										45
									
								
								bigos/lecture20250429/4WebscrapeLocal/webscrapeTest01Y.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								bigos/lecture20250429/4WebscrapeLocal/webscrapeTest01Y.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,45 @@
 | 
			
		|||
# https://saturncloud.io/blog/how-to-extract-tables-from-html-with-python-and-pandas/
 | 
			
		||||
 | 
			
		||||
import pandas as pd
 | 
			
		||||
from bs4 import BeautifulSoup
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Library for opening url and creating
 | 
			
		||||
# requests
 | 
			
		||||
import urllib.request
 | 
			
		||||
import ssl
 | 
			
		||||
 | 
			
		||||
# pretty-print python data structures
 | 
			
		||||
from pprint import pprint
 | 
			
		||||
 | 
			
		||||
from io import StringIO
 | 
			
		||||
 | 
			
		||||
# Opens a website and read its
 | 
			
		||||
# binary contents (HTTP Response Body)
 | 
			
		||||
def url_get_contents(url):
 | 
			
		||||
 | 
			
		||||
	# Opens a website and read its
 | 
			
		||||
	# binary contents (HTTP Response Body)
 | 
			
		||||
 | 
			
		||||
	#making request to the website
 | 
			
		||||
	req = urllib.request.Request(url=url)
 | 
			
		||||
	f = urllib.request.urlopen(req)
 | 
			
		||||
 | 
			
		||||
	#reading contents of the website
 | 
			
		||||
	return f.read()
 | 
			
		||||
 | 
			
		||||
# https://stackoverflow.com/questions/27835619/urllib-and-ssl-certificate-verify-failed-error
 | 
			
		||||
ssl._create_default_https_context = ssl._create_unverified_context
 | 
			
		||||
 | 
			
		||||
webpage = url_get_contents('https://cset2.stcc.edu/~csetuser/pandas/index.html')
 | 
			
		||||
 | 
			
		||||
# Read the HTML file into a Pandas dataframe
 | 
			
		||||
soup = BeautifulSoup(webpage, 'html.parser')
 | 
			
		||||
tables = pd.read_html(StringIO(str(soup)))
 | 
			
		||||
 | 
			
		||||
# Extract the table from the dataframe
 | 
			
		||||
table = tables[0]
 | 
			
		||||
print(table)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										23
									
								
								bigos/lecture20250429/4WebscrapeLocal/webscrapeTest02.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								bigos/lecture20250429/4WebscrapeLocal/webscrapeTest02.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,23 @@
 | 
			
		|||
# https://saturncloud.io/blog/how-to-extract-tables-from-html-with-python-and-pandas/
 | 
			
		||||
# https://search.brave.com/search?q=Passing+literal+html+to+%27read_html%27+is+deprecated+and+will+be+removed+in+a+future+version.+To+read+from+a+literal+string%2C+wrap+it+in+a+%27StringIO%27+object.+tables+%3D+pd.read_html%28str%28soup%29%29&source=desktop&summary=1&conversation=212132c0d053324192dd99
 | 
			
		||||
# https://search.brave.com/search?q=python3+StringIO&source=desktop&summary=1&conversation=784ebc2d4f47f7464a9ca6
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
import pandas as pd
 | 
			
		||||
from bs4 import BeautifulSoup
 | 
			
		||||
from io import StringIO
 | 
			
		||||
 | 
			
		||||
# Read the HTML file into a Pandas dataframe
 | 
			
		||||
with open('index.html') as file:
 | 
			
		||||
    soup = BeautifulSoup(file, 'html.parser')
 | 
			
		||||
tables = pd.read_html(StringIO(str(soup)))
 | 
			
		||||
 | 
			
		||||
# Extract the table from the dataframe
 | 
			
		||||
table = tables[0]
 | 
			
		||||
print(table)
 | 
			
		||||
 | 
			
		||||
# Extract all tables from the dataframe
 | 
			
		||||
for i, table in enumerate(tables):
 | 
			
		||||
    print(f"Table {i + 1}:\n{table}\n")
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										42
									
								
								bigos/lecture20250429/4WebscrapeLocal/webscrapeTest02Y.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								bigos/lecture20250429/4WebscrapeLocal/webscrapeTest02Y.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,42 @@
 | 
			
		|||
# https://saturncloud.io/blog/how-to-extract-tables-from-html-with-python-and-pandas/
 | 
			
		||||
# https://search.brave.com/search?q=Passing+literal+html+to+%27read_html%27+is+deprecated+and+will+be+removed+in+a+future+version.+To+read+from+a+literal+string%2C+wrap+it+in+a+%27StringIO%27+object.+tables+%3D+pd.read_html%28str%28soup%29%29&source=desktop&summary=1&conversation=212132c0d053324192dd99
 | 
			
		||||
# https://search.brave.com/search?q=python3+StringIO&source=desktop&summary=1&conversation=784ebc2d4f47f7464a9ca6
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
import pandas as pd
 | 
			
		||||
from bs4 import BeautifulSoup
 | 
			
		||||
from io import StringIO
 | 
			
		||||
 | 
			
		||||
import urllib.request
 | 
			
		||||
import ssl
 | 
			
		||||
 | 
			
		||||
# Opens a website and read its
 | 
			
		||||
# binary contents (HTTP Response Body)
 | 
			
		||||
def url_get_contents(url):
 | 
			
		||||
 | 
			
		||||
	# Opens a website and read its
 | 
			
		||||
	# binary contents (HTTP Response Body)
 | 
			
		||||
 | 
			
		||||
	#making request to the website
 | 
			
		||||
	req = urllib.request.Request(url=url)
 | 
			
		||||
	f = urllib.request.urlopen(req)
 | 
			
		||||
 | 
			
		||||
	#reading contents of the website
 | 
			
		||||
	return f.read()
 | 
			
		||||
 | 
			
		||||
# https://stackoverflow.com/questions/27835619/urllib-and-ssl-certificate-verify-failed-error
 | 
			
		||||
ssl._create_default_https_context = ssl._create_unverified_context
 | 
			
		||||
 | 
			
		||||
webpage = url_get_contents('https://cset2.stcc.edu/~csetuser/pandas/index.html')
 | 
			
		||||
soup = BeautifulSoup(webpage, 'html.parser')
 | 
			
		||||
tables = pd.read_html(StringIO(str(soup)))
 | 
			
		||||
 | 
			
		||||
# Extract the table from the dataframe
 | 
			
		||||
table = tables[0]
 | 
			
		||||
print(table)
 | 
			
		||||
 | 
			
		||||
# Extract all tables from the dataframe
 | 
			
		||||
for i, table in enumerate(tables):
 | 
			
		||||
    print(f"Table {i + 1}:\n{table}\n")
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										42
									
								
								bigos/lecture20250429/4WebscrapeLocal/webscrapeTest02Z.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								bigos/lecture20250429/4WebscrapeLocal/webscrapeTest02Z.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,42 @@
 | 
			
		|||
# https://saturncloud.io/blog/how-to-extract-tables-from-html-with-python-and-pandas/
 | 
			
		||||
# https://search.brave.com/search?q=Passing+literal+html+to+%27read_html%27+is+deprecated+and+will+be+removed+in+a+future+version.+To+read+from+a+literal+string%2C+wrap+it+in+a+%27StringIO%27+object.+tables+%3D+pd.read_html%28str%28soup%29%29&source=desktop&summary=1&conversation=212132c0d053324192dd99
 | 
			
		||||
# https://search.brave.com/search?q=python3+StringIO&source=desktop&summary=1&conversation=784ebc2d4f47f7464a9ca6
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
import pandas as pd
 | 
			
		||||
from bs4 import BeautifulSoup
 | 
			
		||||
from io import StringIO
 | 
			
		||||
 | 
			
		||||
import urllib.request
 | 
			
		||||
import ssl
 | 
			
		||||
 | 
			
		||||
# Opens a website and read its
 | 
			
		||||
# binary contents (HTTP Response Body)
 | 
			
		||||
def url_get_contents(url):
 | 
			
		||||
 | 
			
		||||
	# Opens a website and read its
 | 
			
		||||
	# binary contents (HTTP Response Body)
 | 
			
		||||
 | 
			
		||||
	#making request to the website
 | 
			
		||||
	req = urllib.request.Request(url=url)
 | 
			
		||||
	f = urllib.request.urlopen(req)
 | 
			
		||||
 | 
			
		||||
	#reading contents of the website
 | 
			
		||||
	return f.read()
 | 
			
		||||
 | 
			
		||||
# https://stackoverflow.com/questions/27835619/urllib-and-ssl-certificate-verify-failed-error
 | 
			
		||||
ssl._create_default_https_context = ssl._create_unverified_context
 | 
			
		||||
 | 
			
		||||
webpage = url_get_contents('https://www.stcc.edu/about-stcc/employee-directory/')
 | 
			
		||||
soup = BeautifulSoup(webpage, 'html.parser')
 | 
			
		||||
tables = pd.read_html(StringIO(str(soup)))
 | 
			
		||||
 | 
			
		||||
# Extract the table from the dataframe
 | 
			
		||||
table = tables[0]
 | 
			
		||||
print(table)
 | 
			
		||||
 | 
			
		||||
# Extract all tables from the dataframe
 | 
			
		||||
for i, table in enumerate(tables):
 | 
			
		||||
    print(f"Table {i + 1}:\n{table}\n")
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										4
									
								
								bigos/lecture20250429/Links/Links.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								bigos/lecture20250429/Links/Links.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,4 @@
 | 
			
		|||
 | 
			
		||||
DataFrame vs Series in Pandas
 | 
			
		||||
https://www.geeksforgeeks.org/dataframe-vs-series-in-pandas/
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue