diff --git a/bigos/lecture20250429/1CreateBasicSeries/hack.py b/bigos/lecture20250429/1CreateBasicSeries/hack.py new file mode 100644 index 0000000..7c53cc1 --- /dev/null +++ b/bigos/lecture20250429/1CreateBasicSeries/hack.py @@ -0,0 +1,9 @@ +import pandas as pd +s1 = pd.Series([1.25,1.75,2.25,2.75,3.25],index=['a','b','c','d','e']) +print(s1) +s2 = pd.Series([6,11.2,4,2,1.1],index=['a','b','c','d','e']) +print(s1) +s3 = s1+s2 +print(s3) +s4 = s1*s2 +print(s4) \ No newline at end of file diff --git a/bigos/lecture20250429/1CreateBasicSeries/series01.py b/bigos/lecture20250429/1CreateBasicSeries/series01.py new file mode 100644 index 0000000..ca972c0 --- /dev/null +++ b/bigos/lecture20250429/1CreateBasicSeries/series01.py @@ -0,0 +1,3 @@ +import pandas as pd +s1 = pd.Series([1.25,1.75,2.25,2.75,3.25]) +print(s1) diff --git a/bigos/lecture20250429/1CreateBasicSeries/series02.py b/bigos/lecture20250429/1CreateBasicSeries/series02.py new file mode 100644 index 0000000..bd0e86a --- /dev/null +++ b/bigos/lecture20250429/1CreateBasicSeries/series02.py @@ -0,0 +1,3 @@ +import pandas as pd +s1 = pd.Series([1.25,1.75,2.25,2.75,3.25],index=['a','b','c','d','e']) +print(s1) diff --git a/bigos/lecture20250429/1CreateBasicSeries/series03.py b/bigos/lecture20250429/1CreateBasicSeries/series03.py new file mode 100644 index 0000000..cf836d5 --- /dev/null +++ b/bigos/lecture20250429/1CreateBasicSeries/series03.py @@ -0,0 +1,5 @@ +import pandas as pd +s1 = pd.Series([1.25,1.75,2.25,2.75,3.25],index=['a','b','c','d','e']) +print(s1) +s3 = s1+5 +print(s3) \ No newline at end of file diff --git a/bigos/lecture20250429/1CreateBasicSeries/series04.py b/bigos/lecture20250429/1CreateBasicSeries/series04.py new file mode 100644 index 0000000..a46b132 --- /dev/null +++ b/bigos/lecture20250429/1CreateBasicSeries/series04.py @@ -0,0 +1,5 @@ +import pandas as pd +s1 = pd.Series([1.25,1.75,2.25,2.75,3.25]) +print(s1) +s2 = s1[s1>2] +print(s2) \ No newline at end of file diff --git a/bigos/lecture20250429/1CreateBasicSeries/seriesandnumpy01.py b/bigos/lecture20250429/1CreateBasicSeries/seriesandnumpy01.py new file mode 100644 index 0000000..562994a --- /dev/null +++ b/bigos/lecture20250429/1CreateBasicSeries/seriesandnumpy01.py @@ -0,0 +1,9 @@ +import pandas as pd +import numpy as np +arr = np.array([10,20,30,40,50]) +s = pd.Series(arr) +print(arr) +print(s) +t = np.sqrt(s) +print(t) +print(type(t)) \ No newline at end of file diff --git a/bigos/lecture20250429/2CreateBasicDataframe/CreateDataFrame01.py b/bigos/lecture20250429/2CreateBasicDataframe/CreateDataFrame01.py new file mode 100644 index 0000000..0e36ae7 --- /dev/null +++ b/bigos/lecture20250429/2CreateBasicDataframe/CreateDataFrame01.py @@ -0,0 +1,5 @@ +import pandas as pd + +df = pd.DataFrame() +print("Create empty data frame") +print(df) diff --git a/bigos/lecture20250429/2CreateBasicDataframe/CreateDataFrame02.py b/bigos/lecture20250429/2CreateBasicDataframe/CreateDataFrame02.py new file mode 100644 index 0000000..b81c356 --- /dev/null +++ b/bigos/lecture20250429/2CreateBasicDataframe/CreateDataFrame02.py @@ -0,0 +1,5 @@ +import pandas as pd +s = pd.Series(['a','b','c','d']) +df = pd.DataFrame(s) +print("Create data frame from series") +print(df) diff --git a/bigos/lecture20250429/2CreateBasicDataframe/CreateDataFrame03.py b/bigos/lecture20250429/2CreateBasicDataframe/CreateDataFrame03.py new file mode 100644 index 0000000..b024ac3 --- /dev/null +++ b/bigos/lecture20250429/2CreateBasicDataframe/CreateDataFrame03.py @@ -0,0 +1,9 @@ +import pandas as pd +name = pd.Series(['Bob','Sam']) +team = pd.Series(['Wild Bunch','Sleepy Team']) +dic = {'Name':name,'Team':team} + +df = pd.DataFrame(dic) + +print("Create data frame from a dictionary") +print(df) diff --git a/bigos/lecture20250429/2CreateBasicDataframe/CreateDataFrame04.py b/bigos/lecture20250429/2CreateBasicDataframe/CreateDataFrame04.py new file mode 100644 index 0000000..ff9823f --- /dev/null +++ b/bigos/lecture20250429/2CreateBasicDataframe/CreateDataFrame04.py @@ -0,0 +1,23 @@ +import pandas as pd + +namesList = [{'FirstName':"Bob", 'LastName': "Smith"}, + {'FirstName':"Rusty", 'LastName': "Jones"}, + {'FirstName': "Tanner", 'LastName': "Golden"}, + {'FirstName': "Harry", 'LastName': "Chinook"} + ] +df = pd.DataFrame(namesList) +print(df) + +# Access data row wise using iterrows() +print(" ========== Row wise data =========") +for (row_index,row_value) in df.iterrows(): + print("\n Row index is :",row_index) + print("\n Row Value is: ",row_value) + +print("\n") +# Access data row wise using iterrows() +print(" ========== Column wise data =========") +#for (col_index,col_value) in df.iteritems(): +for (col_index, col_value) in df.items() : + print("\n Col index is :",col_index) + print("\n Col Value is: ",col_value) diff --git a/bigos/lecture20250429/3PrintAndDescribe/CreateDataFrame01.py b/bigos/lecture20250429/3PrintAndDescribe/CreateDataFrame01.py new file mode 100644 index 0000000..0e36ae7 --- /dev/null +++ b/bigos/lecture20250429/3PrintAndDescribe/CreateDataFrame01.py @@ -0,0 +1,5 @@ +import pandas as pd + +df = pd.DataFrame() +print("Create empty data frame") +print(df) diff --git a/bigos/lecture20250429/3PrintAndDescribe/CreateDataFrame04.py b/bigos/lecture20250429/3PrintAndDescribe/CreateDataFrame04.py new file mode 100644 index 0000000..d580c98 --- /dev/null +++ b/bigos/lecture20250429/3PrintAndDescribe/CreateDataFrame04.py @@ -0,0 +1,23 @@ +import pandas as pd + +namesList = [{'FirstName':"Bob", 'LastName': "Smith"}, + {'FirstName':"Rusty", 'LastName': "Jones"}, + {'FirstName':"Tanner", 'LastName': "Golden"}, + {'FirstName':"Harry", 'LastName': "Chinook"} + ] +df = pd.DataFrame(namesList) +print(df) + +# Access data row wise using iterrows() +print(" ========== Row wise data =========") +for (row_index,row_value) in df.iterrows(): + print("\n Row index is :",row_index) + print("\n Row Value is: ",row_value) + +print("\n") +# Access data row wise using iterrows() +print(" ========== Column wise data =========") +#for (col_index,col_value) in df.iteritems(): +for (col_index, col_value) in df.items() : + print("\n Col index is :",col_index) + print("\n Col Value is: ",col_value) diff --git a/bigos/lecture20250429/3PrintAndDescribe/Describe01.py b/bigos/lecture20250429/3PrintAndDescribe/Describe01.py new file mode 100644 index 0000000..b8c74e9 --- /dev/null +++ b/bigos/lecture20250429/3PrintAndDescribe/Describe01.py @@ -0,0 +1,7 @@ +# Statistics Summary of a Series +import pandas as pd + +s1 = pd.Series([10,20,30,40,50]) +print("Original\n", s1) +print("Describe") +print(s1.describe()) diff --git a/bigos/lecture20250429/4WebscrapeLocal/index.html b/bigos/lecture20250429/4WebscrapeLocal/index.html new file mode 100644 index 0000000..e1f5cbc --- /dev/null +++ b/bigos/lecture20250429/4WebscrapeLocal/index.html @@ -0,0 +1,32 @@ + + + + + + Example HTML Table + + + +

Sample HTML Table

+ + + + + + + + + + + + + + + + + +
Header 1Header 2Header 3
Data 1-1Data 1-2Data 1-3
Data 2-1Data 2-2Data 2-3
+ + + + diff --git a/bigos/lecture20250429/4WebscrapeLocal/index2.html b/bigos/lecture20250429/4WebscrapeLocal/index2.html new file mode 100644 index 0000000..60b8ea5 --- /dev/null +++ b/bigos/lecture20250429/4WebscrapeLocal/index2.html @@ -0,0 +1,50 @@ + + + + + + Example HTML Table + + + +

Sample HTML Table

+ + + + + + + + + + + + + + + + + +
Header 1Header 2Header 3
Data 1-1Data 1-2Data 1-3
Data 2-1Data 2-2Data 2-3
+
+ + + + + + + + + + + + + + + + +
Header 4Header 5Header 6
Data 1-1Data 1-2Data 1-3
Data 2-1Data 2-2Data 2-3
+ + + + diff --git a/bigos/lecture20250429/4WebscrapeLocal/webscrapeTest01.py b/bigos/lecture20250429/4WebscrapeLocal/webscrapeTest01.py new file mode 100644 index 0000000..817e81b --- /dev/null +++ b/bigos/lecture20250429/4WebscrapeLocal/webscrapeTest01.py @@ -0,0 +1,17 @@ +# https://saturncloud.io/blog/how-to-extract-tables-from-html-with-python-and-pandas/ + + + +import pandas as pd +from bs4 import BeautifulSoup + +# Read the HTML file into a Pandas dataframe +with open('index.html') as file: + soup = BeautifulSoup(file, 'html.parser') +tables = pd.read_html(str(soup)) + +# Extract the table from the dataframe +table = tables[0] +print(table) + + diff --git a/bigos/lecture20250429/4WebscrapeLocal/webscrapeTest01X.py b/bigos/lecture20250429/4WebscrapeLocal/webscrapeTest01X.py new file mode 100644 index 0000000..724373e --- /dev/null +++ b/bigos/lecture20250429/4WebscrapeLocal/webscrapeTest01X.py @@ -0,0 +1,44 @@ +# https://saturncloud.io/blog/how-to-extract-tables-from-html-with-python-and-pandas/ + +import pandas as pd +from bs4 import BeautifulSoup + + +# Library for opening url and creating +# requests +import urllib.request +import ssl + +# pretty-print python data structures +from pprint import pprint + + +# Opens a website and read its +# binary contents (HTTP Response Body) +def url_get_contents(url): + + # Opens a website and read its + # binary contents (HTTP Response Body) + + #making request to the website + req = urllib.request.Request(url=url) + f = urllib.request.urlopen(req) + + #reading contents of the website + return f.read() + +# https://stackoverflow.com/questions/27835619/urllib-and-ssl-certificate-verify-failed-error +ssl._create_default_https_context = ssl._create_unverified_context + +webpage = url_get_contents('https://cset2.stcc.edu/~csetuser/pandas/index.html') + +# Read the HTML file into a Pandas dataframe +soup = BeautifulSoup(webpage, 'html.parser') +tables = pd.read_html(str(soup)) + +# Extract the table from the dataframe +table = tables[0] +print(table) + + + diff --git a/bigos/lecture20250429/4WebscrapeLocal/webscrapeTest01Y.py b/bigos/lecture20250429/4WebscrapeLocal/webscrapeTest01Y.py new file mode 100644 index 0000000..ae87628 --- /dev/null +++ b/bigos/lecture20250429/4WebscrapeLocal/webscrapeTest01Y.py @@ -0,0 +1,45 @@ +# https://saturncloud.io/blog/how-to-extract-tables-from-html-with-python-and-pandas/ + +import pandas as pd +from bs4 import BeautifulSoup + + +# Library for opening url and creating +# requests +import urllib.request +import ssl + +# pretty-print python data structures +from pprint import pprint + +from io import StringIO + +# Opens a website and read its +# binary contents (HTTP Response Body) +def url_get_contents(url): + + # Opens a website and read its + # binary contents (HTTP Response Body) + + #making request to the website + req = urllib.request.Request(url=url) + f = urllib.request.urlopen(req) + + #reading contents of the website + return f.read() + +# https://stackoverflow.com/questions/27835619/urllib-and-ssl-certificate-verify-failed-error +ssl._create_default_https_context = ssl._create_unverified_context + +webpage = url_get_contents('https://cset2.stcc.edu/~csetuser/pandas/index.html') + +# Read the HTML file into a Pandas dataframe +soup = BeautifulSoup(webpage, 'html.parser') +tables = pd.read_html(StringIO(str(soup))) + +# Extract the table from the dataframe +table = tables[0] +print(table) + + + diff --git a/bigos/lecture20250429/4WebscrapeLocal/webscrapeTest02.py b/bigos/lecture20250429/4WebscrapeLocal/webscrapeTest02.py new file mode 100644 index 0000000..891d22a --- /dev/null +++ b/bigos/lecture20250429/4WebscrapeLocal/webscrapeTest02.py @@ -0,0 +1,23 @@ +# https://saturncloud.io/blog/how-to-extract-tables-from-html-with-python-and-pandas/ +# https://search.brave.com/search?q=Passing+literal+html+to+%27read_html%27+is+deprecated+and+will+be+removed+in+a+future+version.+To+read+from+a+literal+string%2C+wrap+it+in+a+%27StringIO%27+object.+tables+%3D+pd.read_html%28str%28soup%29%29&source=desktop&summary=1&conversation=212132c0d053324192dd99 +# https://search.brave.com/search?q=python3+StringIO&source=desktop&summary=1&conversation=784ebc2d4f47f7464a9ca6 + + + +import pandas as pd +from bs4 import BeautifulSoup +from io import StringIO + +# Read the HTML file into a Pandas dataframe +with open('index.html') as file: + soup = BeautifulSoup(file, 'html.parser') +tables = pd.read_html(StringIO(str(soup))) + +# Extract the table from the dataframe +table = tables[0] +print(table) + +# Extract all tables from the dataframe +for i, table in enumerate(tables): + print(f"Table {i + 1}:\n{table}\n") + diff --git a/bigos/lecture20250429/4WebscrapeLocal/webscrapeTest02Y.py b/bigos/lecture20250429/4WebscrapeLocal/webscrapeTest02Y.py new file mode 100644 index 0000000..545ae9a --- /dev/null +++ b/bigos/lecture20250429/4WebscrapeLocal/webscrapeTest02Y.py @@ -0,0 +1,42 @@ +# https://saturncloud.io/blog/how-to-extract-tables-from-html-with-python-and-pandas/ +# https://search.brave.com/search?q=Passing+literal+html+to+%27read_html%27+is+deprecated+and+will+be+removed+in+a+future+version.+To+read+from+a+literal+string%2C+wrap+it+in+a+%27StringIO%27+object.+tables+%3D+pd.read_html%28str%28soup%29%29&source=desktop&summary=1&conversation=212132c0d053324192dd99 +# https://search.brave.com/search?q=python3+StringIO&source=desktop&summary=1&conversation=784ebc2d4f47f7464a9ca6 + + + +import pandas as pd +from bs4 import BeautifulSoup +from io import StringIO + +import urllib.request +import ssl + +# Opens a website and read its +# binary contents (HTTP Response Body) +def url_get_contents(url): + + # Opens a website and read its + # binary contents (HTTP Response Body) + + #making request to the website + req = urllib.request.Request(url=url) + f = urllib.request.urlopen(req) + + #reading contents of the website + return f.read() + +# https://stackoverflow.com/questions/27835619/urllib-and-ssl-certificate-verify-failed-error +ssl._create_default_https_context = ssl._create_unverified_context + +webpage = url_get_contents('https://cset2.stcc.edu/~csetuser/pandas/index.html') +soup = BeautifulSoup(webpage, 'html.parser') +tables = pd.read_html(StringIO(str(soup))) + +# Extract the table from the dataframe +table = tables[0] +print(table) + +# Extract all tables from the dataframe +for i, table in enumerate(tables): + print(f"Table {i + 1}:\n{table}\n") + diff --git a/bigos/lecture20250429/4WebscrapeLocal/webscrapeTest02Z.py b/bigos/lecture20250429/4WebscrapeLocal/webscrapeTest02Z.py new file mode 100644 index 0000000..3453d63 --- /dev/null +++ b/bigos/lecture20250429/4WebscrapeLocal/webscrapeTest02Z.py @@ -0,0 +1,42 @@ +# https://saturncloud.io/blog/how-to-extract-tables-from-html-with-python-and-pandas/ +# https://search.brave.com/search?q=Passing+literal+html+to+%27read_html%27+is+deprecated+and+will+be+removed+in+a+future+version.+To+read+from+a+literal+string%2C+wrap+it+in+a+%27StringIO%27+object.+tables+%3D+pd.read_html%28str%28soup%29%29&source=desktop&summary=1&conversation=212132c0d053324192dd99 +# https://search.brave.com/search?q=python3+StringIO&source=desktop&summary=1&conversation=784ebc2d4f47f7464a9ca6 + + + +import pandas as pd +from bs4 import BeautifulSoup +from io import StringIO + +import urllib.request +import ssl + +# Opens a website and read its +# binary contents (HTTP Response Body) +def url_get_contents(url): + + # Opens a website and read its + # binary contents (HTTP Response Body) + + #making request to the website + req = urllib.request.Request(url=url) + f = urllib.request.urlopen(req) + + #reading contents of the website + return f.read() + +# https://stackoverflow.com/questions/27835619/urllib-and-ssl-certificate-verify-failed-error +ssl._create_default_https_context = ssl._create_unverified_context + +webpage = url_get_contents('https://www.stcc.edu/about-stcc/employee-directory/') +soup = BeautifulSoup(webpage, 'html.parser') +tables = pd.read_html(StringIO(str(soup))) + +# Extract the table from the dataframe +table = tables[0] +print(table) + +# Extract all tables from the dataframe +for i, table in enumerate(tables): + print(f"Table {i + 1}:\n{table}\n") + diff --git a/bigos/lecture20250429/Links/Links.txt b/bigos/lecture20250429/Links/Links.txt new file mode 100644 index 0000000..715e8e1 --- /dev/null +++ b/bigos/lecture20250429/Links/Links.txt @@ -0,0 +1,4 @@ + +DataFrame vs Series in Pandas +https://www.geeksforgeeks.org/dataframe-vs-series-in-pandas/ +