Added Pandas code 4-29-2025

2025-04-29 09:32:41 -04:00 · 2025-04-29 09:32:41 -04:00 · 83c99e9695
commit 83c99e9695
parent 883afde41f
22 changed files with 410 additions and 0 deletions
--- a/bigos/lecture20250429/1CreateBasicSeries/hack.py
+++ b/bigos/lecture20250429/1CreateBasicSeries/hack.py
@ -0,0 +1,9 @@
+import pandas as pd
+s1 = pd.Series([1.25,1.75,2.25,2.75,3.25],index=['a','b','c','d','e'])
+print(s1)
+s2 = pd.Series([6,11.2,4,2,1.1],index=['a','b','c','d','e'])
+print(s1)
+s3 = s1+s2
+print(s3)
+s4 = s1*s2
+print(s4)
--- a/bigos/lecture20250429/1CreateBasicSeries/series01.py
+++ b/bigos/lecture20250429/1CreateBasicSeries/series01.py
@ -0,0 +1,3 @@
+import pandas as pd
+s1 = pd.Series([1.25,1.75,2.25,2.75,3.25])
+print(s1)
--- a/bigos/lecture20250429/1CreateBasicSeries/series02.py
+++ b/bigos/lecture20250429/1CreateBasicSeries/series02.py
@ -0,0 +1,3 @@
+import pandas as pd
+s1 = pd.Series([1.25,1.75,2.25,2.75,3.25],index=['a','b','c','d','e'])
+print(s1)
--- a/bigos/lecture20250429/1CreateBasicSeries/series03.py
+++ b/bigos/lecture20250429/1CreateBasicSeries/series03.py
@ -0,0 +1,5 @@
+import pandas as pd
+s1 = pd.Series([1.25,1.75,2.25,2.75,3.25],index=['a','b','c','d','e'])
+print(s1)
+s3 = s1+5
+print(s3)
--- a/bigos/lecture20250429/1CreateBasicSeries/series04.py
+++ b/bigos/lecture20250429/1CreateBasicSeries/series04.py
@ -0,0 +1,5 @@
+import pandas as pd
+s1 = pd.Series([1.25,1.75,2.25,2.75,3.25])
+print(s1)
+s2 = s1[s1>2]
+print(s2)
--- a/bigos/lecture20250429/1CreateBasicSeries/seriesandnumpy01.py
+++ b/bigos/lecture20250429/1CreateBasicSeries/seriesandnumpy01.py
@ -0,0 +1,9 @@
+import pandas as pd
+import numpy as np
+arr = np.array([10,20,30,40,50])
+s = pd.Series(arr)
+print(arr)
+print(s)
+t = np.sqrt(s)
+print(t)
+print(type(t))
--- a/bigos/lecture20250429/2CreateBasicDataframe/CreateDataFrame01.py
+++ b/bigos/lecture20250429/2CreateBasicDataframe/CreateDataFrame01.py
@ -0,0 +1,5 @@
+import pandas as pd
+
+df = pd.DataFrame()
+print("Create empty data frame")
+print(df)
--- a/bigos/lecture20250429/2CreateBasicDataframe/CreateDataFrame02.py
+++ b/bigos/lecture20250429/2CreateBasicDataframe/CreateDataFrame02.py
@ -0,0 +1,5 @@
+import pandas as pd
+s = pd.Series(['a','b','c','d'])
+df = pd.DataFrame(s)
+print("Create data frame from series")
+print(df)
--- a/bigos/lecture20250429/2CreateBasicDataframe/CreateDataFrame03.py
+++ b/bigos/lecture20250429/2CreateBasicDataframe/CreateDataFrame03.py
@ -0,0 +1,9 @@
+import pandas as pd
+name = pd.Series(['Bob','Sam'])
+team = pd.Series(['Wild Bunch','Sleepy Team'])
+dic = {'Name':name,'Team':team}
+
+df = pd.DataFrame(dic)
+
+print("Create data frame from a dictionary")
+print(df)
--- a/bigos/lecture20250429/2CreateBasicDataframe/CreateDataFrame04.py
+++ b/bigos/lecture20250429/2CreateBasicDataframe/CreateDataFrame04.py
@ -0,0 +1,23 @@
+import pandas as pd
+
+namesList = [{'FirstName':"Bob",     'LastName': "Smith"},
+             {'FirstName':"Rusty",   'LastName': "Jones"},
+             {'FirstName': "Tanner", 'LastName': "Golden"},
+             {'FirstName': "Harry",   'LastName': "Chinook"}
+             ]
+df = pd.DataFrame(namesList)
+print(df)
+
+# Access data row wise using iterrows()
+print(" ========== Row wise data =========")
+for (row_index,row_value) in df.iterrows():
+    print("\n Row index is :",row_index)
+    print("\n Row Value is: ",row_value)
+
+print("\n")
+# Access data row wise using iterrows()
+print(" ========== Column wise data =========")
+#for (col_index,col_value) in df.iteritems():
+for (col_index, col_value) in df.items() :
+    print("\n Col index is :",col_index)
+    print("\n Col Value is: ",col_value)
--- a/bigos/lecture20250429/3PrintAndDescribe/CreateDataFrame01.py
+++ b/bigos/lecture20250429/3PrintAndDescribe/CreateDataFrame01.py
@ -0,0 +1,5 @@
+import pandas as pd
+
+df = pd.DataFrame()
+print("Create empty data frame")
+print(df)
--- a/bigos/lecture20250429/3PrintAndDescribe/CreateDataFrame04.py
+++ b/bigos/lecture20250429/3PrintAndDescribe/CreateDataFrame04.py
@ -0,0 +1,23 @@
+import pandas as pd
+
+namesList = [{'FirstName':"Bob",     'LastName': "Smith"},
+             {'FirstName':"Rusty",   'LastName': "Jones"},
+             {'FirstName':"Tanner",   'LastName': "Golden"},
+             {'FirstName':"Harry",    'LastName': "Chinook"}
+             ]
+df = pd.DataFrame(namesList)
+print(df)
+
+# Access data row wise using iterrows()
+print(" ========== Row wise data =========")
+for (row_index,row_value) in df.iterrows():
+    print("\n Row index is :",row_index)
+    print("\n Row Value is: ",row_value)
+
+print("\n")
+# Access data row wise using iterrows()
+print(" ========== Column wise data =========")
+#for (col_index,col_value) in df.iteritems():
+for (col_index, col_value) in df.items() :
+    print("\n Col index is :",col_index)
+    print("\n Col Value is: ",col_value)
--- a/bigos/lecture20250429/3PrintAndDescribe/Describe01.py
+++ b/bigos/lecture20250429/3PrintAndDescribe/Describe01.py
@ -0,0 +1,7 @@
+# Statistics Summary of a Series
+import pandas as pd
+
+s1 = pd.Series([10,20,30,40,50])
+print("Original\n", s1)
+print("Describe")
+print(s1.describe())
--- a/bigos/lecture20250429/4WebscrapeLocal/index.html
+++ b/bigos/lecture20250429/4WebscrapeLocal/index.html
@ -0,0 +1,32 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Example HTML Table</title>
+</head>
+<body>
+
+<h2>Sample HTML Table</h2>
+
+<table border="1">
+    <tr>
+        <th>Header 1</th>
+        <th>Header 2</th>
+        <th>Header 3</th>
+    </tr>
+    <tr>
+        <td>Data 1-1</td>
+        <td>Data 1-2</td>
+        <td>Data 1-3</td>
+    </tr>
+    <tr>
+        <td>Data 2-1</td>
+        <td>Data 2-2</td>
+        <td>Data 2-3</td>
+    </tr>
+</table>
+
+</body>
+</html>
+
--- a/bigos/lecture20250429/4WebscrapeLocal/index2.html
+++ b/bigos/lecture20250429/4WebscrapeLocal/index2.html
@ -0,0 +1,50 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Example HTML Table</title>
+</head>
+<body>
+
+<h2>Sample HTML Table</h2>
+
+<table border="1">
+    <tr>
+        <th>Header 1</th>
+        <th>Header 2</th>
+        <th>Header 3</th>
+    </tr>
+    <tr>
+        <td>Data 1-1</td>
+        <td>Data 1-2</td>
+        <td>Data 1-3</td>
+    </tr>
+    <tr>
+        <td>Data 2-1</td>
+        <td>Data 2-2</td>
+        <td>Data 2-3</td>
+    </tr>
+</table>
+<br/>
+<table border="1">
+    <tr>
+        <th>Header 4</th>
+        <th>Header 5</th>
+        <th>Header 6</th>
+    </tr>
+    <tr>
+        <td>Data 1-1</td>
+        <td>Data 1-2</td>
+        <td>Data 1-3</td>
+    </tr>
+    <tr>
+        <td>Data 2-1</td>
+        <td>Data 2-2</td>
+        <td>Data 2-3</td>
+    </tr>
+</table>
+
+</body>
+</html>
+
--- a/bigos/lecture20250429/4WebscrapeLocal/webscrapeTest01.py
+++ b/bigos/lecture20250429/4WebscrapeLocal/webscrapeTest01.py
@ -0,0 +1,17 @@
+# https://saturncloud.io/blog/how-to-extract-tables-from-html-with-python-and-pandas/
+
+
+
+import pandas as pd
+from bs4 import BeautifulSoup
+
+# Read the HTML file into a Pandas dataframe
+with open('index.html') as file:
+    soup = BeautifulSoup(file, 'html.parser')
+tables = pd.read_html(str(soup))
+
+# Extract the table from the dataframe
+table = tables[0]
+print(table)
+
+
--- a/bigos/lecture20250429/4WebscrapeLocal/webscrapeTest01X.py
+++ b/bigos/lecture20250429/4WebscrapeLocal/webscrapeTest01X.py
@ -0,0 +1,44 @@
+# https://saturncloud.io/blog/how-to-extract-tables-from-html-with-python-and-pandas/
+
+import pandas as pd
+from bs4 import BeautifulSoup
+
+
+# Library for opening url and creating
+# requests
+import urllib.request
+import ssl
+
+# pretty-print python data structures
+from pprint import pprint
+
+
+# Opens a website and read its
+# binary contents (HTTP Response Body)
+def url_get_contents(url):
+
+	# Opens a website and read its
+	# binary contents (HTTP Response Body)
+
+	#making request to the website
+	req = urllib.request.Request(url=url)
+	f = urllib.request.urlopen(req)
+
+	#reading contents of the website
+	return f.read()
+
+# https://stackoverflow.com/questions/27835619/urllib-and-ssl-certificate-verify-failed-error
+ssl._create_default_https_context = ssl._create_unverified_context
+
+webpage = url_get_contents('https://cset2.stcc.edu/~csetuser/pandas/index.html')
+
+# Read the HTML file into a Pandas dataframe
+soup = BeautifulSoup(webpage, 'html.parser')
+tables = pd.read_html(str(soup))
+
+# Extract the table from the dataframe
+table = tables[0]
+print(table)
+
+
+
--- a/bigos/lecture20250429/4WebscrapeLocal/webscrapeTest01Y.py
+++ b/bigos/lecture20250429/4WebscrapeLocal/webscrapeTest01Y.py
@ -0,0 +1,45 @@
+# https://saturncloud.io/blog/how-to-extract-tables-from-html-with-python-and-pandas/
+
+import pandas as pd
+from bs4 import BeautifulSoup
+
+
+# Library for opening url and creating
+# requests
+import urllib.request
+import ssl
+
+# pretty-print python data structures
+from pprint import pprint
+
+from io import StringIO
+
+# Opens a website and read its
+# binary contents (HTTP Response Body)
+def url_get_contents(url):
+
+	# Opens a website and read its
+	# binary contents (HTTP Response Body)
+
+	#making request to the website
+	req = urllib.request.Request(url=url)
+	f = urllib.request.urlopen(req)
+
+	#reading contents of the website
+	return f.read()
+
+# https://stackoverflow.com/questions/27835619/urllib-and-ssl-certificate-verify-failed-error
+ssl._create_default_https_context = ssl._create_unverified_context
+
+webpage = url_get_contents('https://cset2.stcc.edu/~csetuser/pandas/index.html')
+
+# Read the HTML file into a Pandas dataframe
+soup = BeautifulSoup(webpage, 'html.parser')
+tables = pd.read_html(StringIO(str(soup)))
+
+# Extract the table from the dataframe
+table = tables[0]
+print(table)
+
+
+
--- a/bigos/lecture20250429/4WebscrapeLocal/webscrapeTest02.py
+++ b/bigos/lecture20250429/4WebscrapeLocal/webscrapeTest02.py
@ -0,0 +1,23 @@
+# https://saturncloud.io/blog/how-to-extract-tables-from-html-with-python-and-pandas/
+# https://search.brave.com/search?q=Passing+literal+html+to+%27read_html%27+is+deprecated+and+will+be+removed+in+a+future+version.+To+read+from+a+literal+string%2C+wrap+it+in+a+%27StringIO%27+object.+tables+%3D+pd.read_html%28str%28soup%29%29&source=desktop&summary=1&conversation=212132c0d053324192dd99
+# https://search.brave.com/search?q=python3+StringIO&source=desktop&summary=1&conversation=784ebc2d4f47f7464a9ca6
+
+
+
+import pandas as pd
+from bs4 import BeautifulSoup
+from io import StringIO
+
+# Read the HTML file into a Pandas dataframe
+with open('index.html') as file:
+    soup = BeautifulSoup(file, 'html.parser')
+tables = pd.read_html(StringIO(str(soup)))
+
+# Extract the table from the dataframe
+table = tables[0]
+print(table)
+
+# Extract all tables from the dataframe
+for i, table in enumerate(tables):
+    print(f"Table {i + 1}:\n{table}\n")
+
--- a/bigos/lecture20250429/4WebscrapeLocal/webscrapeTest02Y.py
+++ b/bigos/lecture20250429/4WebscrapeLocal/webscrapeTest02Y.py
@ -0,0 +1,42 @@
+# https://saturncloud.io/blog/how-to-extract-tables-from-html-with-python-and-pandas/
+# https://search.brave.com/search?q=Passing+literal+html+to+%27read_html%27+is+deprecated+and+will+be+removed+in+a+future+version.+To+read+from+a+literal+string%2C+wrap+it+in+a+%27StringIO%27+object.+tables+%3D+pd.read_html%28str%28soup%29%29&source=desktop&summary=1&conversation=212132c0d053324192dd99
+# https://search.brave.com/search?q=python3+StringIO&source=desktop&summary=1&conversation=784ebc2d4f47f7464a9ca6
+
+
+
+import pandas as pd
+from bs4 import BeautifulSoup
+from io import StringIO
+
+import urllib.request
+import ssl
+
+# Opens a website and read its
+# binary contents (HTTP Response Body)
+def url_get_contents(url):
+
+	# Opens a website and read its
+	# binary contents (HTTP Response Body)
+
+	#making request to the website
+	req = urllib.request.Request(url=url)
+	f = urllib.request.urlopen(req)
+
+	#reading contents of the website
+	return f.read()
+
+# https://stackoverflow.com/questions/27835619/urllib-and-ssl-certificate-verify-failed-error
+ssl._create_default_https_context = ssl._create_unverified_context
+
+webpage = url_get_contents('https://cset2.stcc.edu/~csetuser/pandas/index.html')
+soup = BeautifulSoup(webpage, 'html.parser')
+tables = pd.read_html(StringIO(str(soup)))
+
+# Extract the table from the dataframe
+table = tables[0]
+print(table)
+
+# Extract all tables from the dataframe
+for i, table in enumerate(tables):
+    print(f"Table {i + 1}:\n{table}\n")
+
--- a/bigos/lecture20250429/4WebscrapeLocal/webscrapeTest02Z.py
+++ b/bigos/lecture20250429/4WebscrapeLocal/webscrapeTest02Z.py
@ -0,0 +1,42 @@
+# https://saturncloud.io/blog/how-to-extract-tables-from-html-with-python-and-pandas/
+# https://search.brave.com/search?q=Passing+literal+html+to+%27read_html%27+is+deprecated+and+will+be+removed+in+a+future+version.+To+read+from+a+literal+string%2C+wrap+it+in+a+%27StringIO%27+object.+tables+%3D+pd.read_html%28str%28soup%29%29&source=desktop&summary=1&conversation=212132c0d053324192dd99
+# https://search.brave.com/search?q=python3+StringIO&source=desktop&summary=1&conversation=784ebc2d4f47f7464a9ca6
+
+
+
+import pandas as pd
+from bs4 import BeautifulSoup
+from io import StringIO
+
+import urllib.request
+import ssl
+
+# Opens a website and read its
+# binary contents (HTTP Response Body)
+def url_get_contents(url):
+
+	# Opens a website and read its
+	# binary contents (HTTP Response Body)
+
+	#making request to the website
+	req = urllib.request.Request(url=url)
+	f = urllib.request.urlopen(req)
+
+	#reading contents of the website
+	return f.read()
+
+# https://stackoverflow.com/questions/27835619/urllib-and-ssl-certificate-verify-failed-error
+ssl._create_default_https_context = ssl._create_unverified_context
+
+webpage = url_get_contents('https://www.stcc.edu/about-stcc/employee-directory/')
+soup = BeautifulSoup(webpage, 'html.parser')
+tables = pd.read_html(StringIO(str(soup)))
+
+# Extract the table from the dataframe
+table = tables[0]
+print(table)
+
+# Extract all tables from the dataframe
+for i, table in enumerate(tables):
+    print(f"Table {i + 1}:\n{table}\n")
+
--- a/bigos/lecture20250429/Links/Links.txt
+++ b/bigos/lecture20250429/Links/Links.txt
@ -0,0 +1,4 @@
+
+DataFrame vs Series in Pandas
+https://www.geeksforgeeks.org/dataframe-vs-series-in-pandas/
+