Added Pandas code 4-29-2025
This commit is contained in:
parent
883afde41f
commit
83c99e9695
22 changed files with 410 additions and 0 deletions
9
bigos/lecture20250429/1CreateBasicSeries/hack.py
Normal file
9
bigos/lecture20250429/1CreateBasicSeries/hack.py
Normal file
|
@ -0,0 +1,9 @@
|
|||
import pandas as pd
|
||||
s1 = pd.Series([1.25,1.75,2.25,2.75,3.25],index=['a','b','c','d','e'])
|
||||
print(s1)
|
||||
s2 = pd.Series([6,11.2,4,2,1.1],index=['a','b','c','d','e'])
|
||||
print(s1)
|
||||
s3 = s1+s2
|
||||
print(s3)
|
||||
s4 = s1*s2
|
||||
print(s4)
|
3
bigos/lecture20250429/1CreateBasicSeries/series01.py
Normal file
3
bigos/lecture20250429/1CreateBasicSeries/series01.py
Normal file
|
@ -0,0 +1,3 @@
|
|||
import pandas as pd
|
||||
s1 = pd.Series([1.25,1.75,2.25,2.75,3.25])
|
||||
print(s1)
|
3
bigos/lecture20250429/1CreateBasicSeries/series02.py
Normal file
3
bigos/lecture20250429/1CreateBasicSeries/series02.py
Normal file
|
@ -0,0 +1,3 @@
|
|||
import pandas as pd
|
||||
s1 = pd.Series([1.25,1.75,2.25,2.75,3.25],index=['a','b','c','d','e'])
|
||||
print(s1)
|
5
bigos/lecture20250429/1CreateBasicSeries/series03.py
Normal file
5
bigos/lecture20250429/1CreateBasicSeries/series03.py
Normal file
|
@ -0,0 +1,5 @@
|
|||
import pandas as pd
|
||||
s1 = pd.Series([1.25,1.75,2.25,2.75,3.25],index=['a','b','c','d','e'])
|
||||
print(s1)
|
||||
s3 = s1+5
|
||||
print(s3)
|
5
bigos/lecture20250429/1CreateBasicSeries/series04.py
Normal file
5
bigos/lecture20250429/1CreateBasicSeries/series04.py
Normal file
|
@ -0,0 +1,5 @@
|
|||
import pandas as pd
|
||||
s1 = pd.Series([1.25,1.75,2.25,2.75,3.25])
|
||||
print(s1)
|
||||
s2 = s1[s1>2]
|
||||
print(s2)
|
|
@ -0,0 +1,9 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
arr = np.array([10,20,30,40,50])
|
||||
s = pd.Series(arr)
|
||||
print(arr)
|
||||
print(s)
|
||||
t = np.sqrt(s)
|
||||
print(t)
|
||||
print(type(t))
|
|
@ -0,0 +1,5 @@
|
|||
import pandas as pd
|
||||
|
||||
df = pd.DataFrame()
|
||||
print("Create empty data frame")
|
||||
print(df)
|
|
@ -0,0 +1,5 @@
|
|||
import pandas as pd
|
||||
s = pd.Series(['a','b','c','d'])
|
||||
df = pd.DataFrame(s)
|
||||
print("Create data frame from series")
|
||||
print(df)
|
|
@ -0,0 +1,9 @@
|
|||
import pandas as pd
|
||||
name = pd.Series(['Bob','Sam'])
|
||||
team = pd.Series(['Wild Bunch','Sleepy Team'])
|
||||
dic = {'Name':name,'Team':team}
|
||||
|
||||
df = pd.DataFrame(dic)
|
||||
|
||||
print("Create data frame from a dictionary")
|
||||
print(df)
|
|
@ -0,0 +1,23 @@
|
|||
import pandas as pd
|
||||
|
||||
namesList = [{'FirstName':"Bob", 'LastName': "Smith"},
|
||||
{'FirstName':"Rusty", 'LastName': "Jones"},
|
||||
{'FirstName': "Tanner", 'LastName': "Golden"},
|
||||
{'FirstName': "Harry", 'LastName': "Chinook"}
|
||||
]
|
||||
df = pd.DataFrame(namesList)
|
||||
print(df)
|
||||
|
||||
# Access data row wise using iterrows()
|
||||
print(" ========== Row wise data =========")
|
||||
for (row_index,row_value) in df.iterrows():
|
||||
print("\n Row index is :",row_index)
|
||||
print("\n Row Value is: ",row_value)
|
||||
|
||||
print("\n")
|
||||
# Access data row wise using iterrows()
|
||||
print(" ========== Column wise data =========")
|
||||
#for (col_index,col_value) in df.iteritems():
|
||||
for (col_index, col_value) in df.items() :
|
||||
print("\n Col index is :",col_index)
|
||||
print("\n Col Value is: ",col_value)
|
|
@ -0,0 +1,5 @@
|
|||
import pandas as pd
|
||||
|
||||
df = pd.DataFrame()
|
||||
print("Create empty data frame")
|
||||
print(df)
|
23
bigos/lecture20250429/3PrintAndDescribe/CreateDataFrame04.py
Normal file
23
bigos/lecture20250429/3PrintAndDescribe/CreateDataFrame04.py
Normal file
|
@ -0,0 +1,23 @@
|
|||
import pandas as pd
|
||||
|
||||
namesList = [{'FirstName':"Bob", 'LastName': "Smith"},
|
||||
{'FirstName':"Rusty", 'LastName': "Jones"},
|
||||
{'FirstName':"Tanner", 'LastName': "Golden"},
|
||||
{'FirstName':"Harry", 'LastName': "Chinook"}
|
||||
]
|
||||
df = pd.DataFrame(namesList)
|
||||
print(df)
|
||||
|
||||
# Access data row wise using iterrows()
|
||||
print(" ========== Row wise data =========")
|
||||
for (row_index,row_value) in df.iterrows():
|
||||
print("\n Row index is :",row_index)
|
||||
print("\n Row Value is: ",row_value)
|
||||
|
||||
print("\n")
|
||||
# Access data row wise using iterrows()
|
||||
print(" ========== Column wise data =========")
|
||||
#for (col_index,col_value) in df.iteritems():
|
||||
for (col_index, col_value) in df.items() :
|
||||
print("\n Col index is :",col_index)
|
||||
print("\n Col Value is: ",col_value)
|
7
bigos/lecture20250429/3PrintAndDescribe/Describe01.py
Normal file
7
bigos/lecture20250429/3PrintAndDescribe/Describe01.py
Normal file
|
@ -0,0 +1,7 @@
|
|||
# Statistics Summary of a Series
|
||||
import pandas as pd
|
||||
|
||||
s1 = pd.Series([10,20,30,40,50])
|
||||
print("Original\n", s1)
|
||||
print("Describe")
|
||||
print(s1.describe())
|
32
bigos/lecture20250429/4WebscrapeLocal/index.html
Normal file
32
bigos/lecture20250429/4WebscrapeLocal/index.html
Normal file
|
@ -0,0 +1,32 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Example HTML Table</title>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h2>Sample HTML Table</h2>
|
||||
|
||||
<table border="1">
|
||||
<tr>
|
||||
<th>Header 1</th>
|
||||
<th>Header 2</th>
|
||||
<th>Header 3</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Data 1-1</td>
|
||||
<td>Data 1-2</td>
|
||||
<td>Data 1-3</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Data 2-1</td>
|
||||
<td>Data 2-2</td>
|
||||
<td>Data 2-3</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
50
bigos/lecture20250429/4WebscrapeLocal/index2.html
Normal file
50
bigos/lecture20250429/4WebscrapeLocal/index2.html
Normal file
|
@ -0,0 +1,50 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Example HTML Table</title>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h2>Sample HTML Table</h2>
|
||||
|
||||
<table border="1">
|
||||
<tr>
|
||||
<th>Header 1</th>
|
||||
<th>Header 2</th>
|
||||
<th>Header 3</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Data 1-1</td>
|
||||
<td>Data 1-2</td>
|
||||
<td>Data 1-3</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Data 2-1</td>
|
||||
<td>Data 2-2</td>
|
||||
<td>Data 2-3</td>
|
||||
</tr>
|
||||
</table>
|
||||
<br/>
|
||||
<table border="1">
|
||||
<tr>
|
||||
<th>Header 4</th>
|
||||
<th>Header 5</th>
|
||||
<th>Header 6</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Data 1-1</td>
|
||||
<td>Data 1-2</td>
|
||||
<td>Data 1-3</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Data 2-1</td>
|
||||
<td>Data 2-2</td>
|
||||
<td>Data 2-3</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
17
bigos/lecture20250429/4WebscrapeLocal/webscrapeTest01.py
Normal file
17
bigos/lecture20250429/4WebscrapeLocal/webscrapeTest01.py
Normal file
|
@ -0,0 +1,17 @@
|
|||
# https://saturncloud.io/blog/how-to-extract-tables-from-html-with-python-and-pandas/
|
||||
|
||||
|
||||
|
||||
import pandas as pd
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
# Read the HTML file into a Pandas dataframe
|
||||
with open('index.html') as file:
|
||||
soup = BeautifulSoup(file, 'html.parser')
|
||||
tables = pd.read_html(str(soup))
|
||||
|
||||
# Extract the table from the dataframe
|
||||
table = tables[0]
|
||||
print(table)
|
||||
|
||||
|
44
bigos/lecture20250429/4WebscrapeLocal/webscrapeTest01X.py
Normal file
44
bigos/lecture20250429/4WebscrapeLocal/webscrapeTest01X.py
Normal file
|
@ -0,0 +1,44 @@
|
|||
# https://saturncloud.io/blog/how-to-extract-tables-from-html-with-python-and-pandas/
|
||||
|
||||
import pandas as pd
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
# Library for opening url and creating
|
||||
# requests
|
||||
import urllib.request
|
||||
import ssl
|
||||
|
||||
# pretty-print python data structures
|
||||
from pprint import pprint
|
||||
|
||||
|
||||
# Opens a website and read its
|
||||
# binary contents (HTTP Response Body)
|
||||
def url_get_contents(url):
|
||||
|
||||
# Opens a website and read its
|
||||
# binary contents (HTTP Response Body)
|
||||
|
||||
#making request to the website
|
||||
req = urllib.request.Request(url=url)
|
||||
f = urllib.request.urlopen(req)
|
||||
|
||||
#reading contents of the website
|
||||
return f.read()
|
||||
|
||||
# https://stackoverflow.com/questions/27835619/urllib-and-ssl-certificate-verify-failed-error
|
||||
ssl._create_default_https_context = ssl._create_unverified_context
|
||||
|
||||
webpage = url_get_contents('https://cset2.stcc.edu/~csetuser/pandas/index.html')
|
||||
|
||||
# Read the HTML file into a Pandas dataframe
|
||||
soup = BeautifulSoup(webpage, 'html.parser')
|
||||
tables = pd.read_html(str(soup))
|
||||
|
||||
# Extract the table from the dataframe
|
||||
table = tables[0]
|
||||
print(table)
|
||||
|
||||
|
||||
|
45
bigos/lecture20250429/4WebscrapeLocal/webscrapeTest01Y.py
Normal file
45
bigos/lecture20250429/4WebscrapeLocal/webscrapeTest01Y.py
Normal file
|
@ -0,0 +1,45 @@
|
|||
# https://saturncloud.io/blog/how-to-extract-tables-from-html-with-python-and-pandas/
|
||||
|
||||
import pandas as pd
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
# Library for opening url and creating
|
||||
# requests
|
||||
import urllib.request
|
||||
import ssl
|
||||
|
||||
# pretty-print python data structures
|
||||
from pprint import pprint
|
||||
|
||||
from io import StringIO
|
||||
|
||||
# Opens a website and read its
|
||||
# binary contents (HTTP Response Body)
|
||||
def url_get_contents(url):
|
||||
|
||||
# Opens a website and read its
|
||||
# binary contents (HTTP Response Body)
|
||||
|
||||
#making request to the website
|
||||
req = urllib.request.Request(url=url)
|
||||
f = urllib.request.urlopen(req)
|
||||
|
||||
#reading contents of the website
|
||||
return f.read()
|
||||
|
||||
# https://stackoverflow.com/questions/27835619/urllib-and-ssl-certificate-verify-failed-error
|
||||
ssl._create_default_https_context = ssl._create_unverified_context
|
||||
|
||||
webpage = url_get_contents('https://cset2.stcc.edu/~csetuser/pandas/index.html')
|
||||
|
||||
# Read the HTML file into a Pandas dataframe
|
||||
soup = BeautifulSoup(webpage, 'html.parser')
|
||||
tables = pd.read_html(StringIO(str(soup)))
|
||||
|
||||
# Extract the table from the dataframe
|
||||
table = tables[0]
|
||||
print(table)
|
||||
|
||||
|
||||
|
23
bigos/lecture20250429/4WebscrapeLocal/webscrapeTest02.py
Normal file
23
bigos/lecture20250429/4WebscrapeLocal/webscrapeTest02.py
Normal file
|
@ -0,0 +1,23 @@
|
|||
# https://saturncloud.io/blog/how-to-extract-tables-from-html-with-python-and-pandas/
|
||||
# https://search.brave.com/search?q=Passing+literal+html+to+%27read_html%27+is+deprecated+and+will+be+removed+in+a+future+version.+To+read+from+a+literal+string%2C+wrap+it+in+a+%27StringIO%27+object.+tables+%3D+pd.read_html%28str%28soup%29%29&source=desktop&summary=1&conversation=212132c0d053324192dd99
|
||||
# https://search.brave.com/search?q=python3+StringIO&source=desktop&summary=1&conversation=784ebc2d4f47f7464a9ca6
|
||||
|
||||
|
||||
|
||||
import pandas as pd
|
||||
from bs4 import BeautifulSoup
|
||||
from io import StringIO
|
||||
|
||||
# Read the HTML file into a Pandas dataframe
|
||||
with open('index.html') as file:
|
||||
soup = BeautifulSoup(file, 'html.parser')
|
||||
tables = pd.read_html(StringIO(str(soup)))
|
||||
|
||||
# Extract the table from the dataframe
|
||||
table = tables[0]
|
||||
print(table)
|
||||
|
||||
# Extract all tables from the dataframe
|
||||
for i, table in enumerate(tables):
|
||||
print(f"Table {i + 1}:\n{table}\n")
|
||||
|
42
bigos/lecture20250429/4WebscrapeLocal/webscrapeTest02Y.py
Normal file
42
bigos/lecture20250429/4WebscrapeLocal/webscrapeTest02Y.py
Normal file
|
@ -0,0 +1,42 @@
|
|||
# https://saturncloud.io/blog/how-to-extract-tables-from-html-with-python-and-pandas/
|
||||
# https://search.brave.com/search?q=Passing+literal+html+to+%27read_html%27+is+deprecated+and+will+be+removed+in+a+future+version.+To+read+from+a+literal+string%2C+wrap+it+in+a+%27StringIO%27+object.+tables+%3D+pd.read_html%28str%28soup%29%29&source=desktop&summary=1&conversation=212132c0d053324192dd99
|
||||
# https://search.brave.com/search?q=python3+StringIO&source=desktop&summary=1&conversation=784ebc2d4f47f7464a9ca6
|
||||
|
||||
|
||||
|
||||
import pandas as pd
|
||||
from bs4 import BeautifulSoup
|
||||
from io import StringIO
|
||||
|
||||
import urllib.request
|
||||
import ssl
|
||||
|
||||
# Opens a website and read its
|
||||
# binary contents (HTTP Response Body)
|
||||
def url_get_contents(url):
|
||||
|
||||
# Opens a website and read its
|
||||
# binary contents (HTTP Response Body)
|
||||
|
||||
#making request to the website
|
||||
req = urllib.request.Request(url=url)
|
||||
f = urllib.request.urlopen(req)
|
||||
|
||||
#reading contents of the website
|
||||
return f.read()
|
||||
|
||||
# https://stackoverflow.com/questions/27835619/urllib-and-ssl-certificate-verify-failed-error
|
||||
ssl._create_default_https_context = ssl._create_unverified_context
|
||||
|
||||
webpage = url_get_contents('https://cset2.stcc.edu/~csetuser/pandas/index.html')
|
||||
soup = BeautifulSoup(webpage, 'html.parser')
|
||||
tables = pd.read_html(StringIO(str(soup)))
|
||||
|
||||
# Extract the table from the dataframe
|
||||
table = tables[0]
|
||||
print(table)
|
||||
|
||||
# Extract all tables from the dataframe
|
||||
for i, table in enumerate(tables):
|
||||
print(f"Table {i + 1}:\n{table}\n")
|
||||
|
42
bigos/lecture20250429/4WebscrapeLocal/webscrapeTest02Z.py
Normal file
42
bigos/lecture20250429/4WebscrapeLocal/webscrapeTest02Z.py
Normal file
|
@ -0,0 +1,42 @@
|
|||
# https://saturncloud.io/blog/how-to-extract-tables-from-html-with-python-and-pandas/
|
||||
# https://search.brave.com/search?q=Passing+literal+html+to+%27read_html%27+is+deprecated+and+will+be+removed+in+a+future+version.+To+read+from+a+literal+string%2C+wrap+it+in+a+%27StringIO%27+object.+tables+%3D+pd.read_html%28str%28soup%29%29&source=desktop&summary=1&conversation=212132c0d053324192dd99
|
||||
# https://search.brave.com/search?q=python3+StringIO&source=desktop&summary=1&conversation=784ebc2d4f47f7464a9ca6
|
||||
|
||||
|
||||
|
||||
import pandas as pd
|
||||
from bs4 import BeautifulSoup
|
||||
from io import StringIO
|
||||
|
||||
import urllib.request
|
||||
import ssl
|
||||
|
||||
# Opens a website and read its
|
||||
# binary contents (HTTP Response Body)
|
||||
def url_get_contents(url):
|
||||
|
||||
# Opens a website and read its
|
||||
# binary contents (HTTP Response Body)
|
||||
|
||||
#making request to the website
|
||||
req = urllib.request.Request(url=url)
|
||||
f = urllib.request.urlopen(req)
|
||||
|
||||
#reading contents of the website
|
||||
return f.read()
|
||||
|
||||
# https://stackoverflow.com/questions/27835619/urllib-and-ssl-certificate-verify-failed-error
|
||||
ssl._create_default_https_context = ssl._create_unverified_context
|
||||
|
||||
webpage = url_get_contents('https://www.stcc.edu/about-stcc/employee-directory/')
|
||||
soup = BeautifulSoup(webpage, 'html.parser')
|
||||
tables = pd.read_html(StringIO(str(soup)))
|
||||
|
||||
# Extract the table from the dataframe
|
||||
table = tables[0]
|
||||
print(table)
|
||||
|
||||
# Extract all tables from the dataframe
|
||||
for i, table in enumerate(tables):
|
||||
print(f"Table {i + 1}:\n{table}\n")
|
||||
|
4
bigos/lecture20250429/Links/Links.txt
Normal file
4
bigos/lecture20250429/Links/Links.txt
Normal file
|
@ -0,0 +1,4 @@
|
|||
|
||||
DataFrame vs Series in Pandas
|
||||
https://www.geeksforgeeks.org/dataframe-vs-series-in-pandas/
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue