import oracledb import getpass import pandas.io.sql as pdsql # default values for the host and database name default_host = 'dbs3.cs.umb.edu:1521' default_database = 'dbs3' # READ USERNAME, PASSWORD, DB HOST, DB NAME from command line hostname = input(f'Enter the hostname ({default_host}): ') # if nothing is entered, use the default host - ignore whitespaces if len(hostname.strip()) == 0: hostname = default_host database = input(f'Enter the database ({default_database}): ') # if nothing is entered, use the default database - ignore whitespaces if len(database.strip()) == 0: database = default_database username = input("Enter your Oracle username: ") # getpass hides the user's input when typed password = getpass.getpass("Enter your Oracle password: ") # dsn is in the form of /. # Construct the dsn, ensuring the right format. dsn = hostname + database if hostname[-1] == '/' else hostname + '/' + database print(f'Trying to connect to Oracle Server \"{dsn}\"') # Establish a connection to DBMS connection = oracledb.connect(user=username, password=password, dsn=dsn) print('Acquired a connection to the DB ...') # run queries against DB using the DB connection # Let's first get a list of tables that exists in the database schema query_select_table_names = 'SELECT table_name FROM user_tables' df = pdsql.read_sql(query_select_table_names , con=connection) print('\n==========================================================================\n') print(df) # Read all students into a Pandas Dataframe student_query = "SELECT * FROM Students" students_df = pdsql.read_sql(student_query, con=connection) # Print column names print('\n==========================================================================\n') print("Column Names:") print(students_df.columns) # Print the shape of the Dataframe print('\n==========================================================================\n') print(f"Shape of the Dataframe: {students_df.shape}") # Print the first two records print('\n==========================================================================\n') print("First two records:") print(students_df.head(2)) # Print the average age print('\n==========================================================================\n') print("Average age of Students:") print(students_df[['AGE']].agg(['mean'])) # Print the min and max of the GPA's print('\n==========================================================================\n') print("Min and max of Students' GPA:") print(students_df[['GPA']].agg(['min', 'max'])) # Query joined data joined_query = """ SELECT s.sid, s.name, s.state, a.aid, a.title FROM Students s JOIN Reads r ON s.sid = r.sid JOIN Articles a ON r.aid = a.aid """ joined_df = pdsql.read_sql(joined_query, con=connection) # Print the joined dataframe print('\n==========================================================================\n') print("Joined (Students, Reads, Articles) Dataframe:") print(joined_df) # Shape of the joined dataframe print('\n==========================================================================\n') print(f"Shape of the joined dataframe: {joined_df.shape}") # Filter for MA Students print('\n==========================================================================\n') print("Filter for MA Students:") MA_students_df = joined_df[joined_df['STATE'] == 'MA'] print(MA_students_df) # Group by sid and count the articles read by each student from MA print('\n==========================================================================\n') print("Number of articles read by MA students:") MA_article_count = MA_students_df.groupby('SID').size() print(MA_article_count) print('\n==========================================================================\n') # Close the connection connection.close() print("\nConnection Closed.")