Green Open Access

Green Open Access#

Show code cell source Hide code cell source

# In diesem Notebook benötigte Funktionen
import plotly.graph_objects as go
import numpy as np
import pandas as pd

# Einlesen der CSV-Datei
data = pd.read_csv("OpenAccessUmfrage_DATA_2024-02-20_1219.csv")

PERSONAL_INFO_MAPPING = {
    1: "FB 01: Rechtswissenschaften",
    2: "FB 02: Wirtschaftswissenschaften",
    3: "FB 03: Gesellschaftswissenschaften und Philosophie",
    4: "FB 04: Psychologie",
    5: "FB 05: Evangelische Theologie",
    6: "FB 06: Geschichte und Kulturwissenschaften",
    7: "FB 09: Germanistik und Kunstwissenschaften",
    8: "FB 10: Fremdsprachliche Philologien",
    9: "FB 12: Mathematik und Informatik",
    10: "FB 13: Physik",
    11: "FB 15: Chemie ",
    12: "FB 16: Pharmazie",
    13: "FB 17: Biologie",
    14: "FB 19: Geographie",
    15: "FB 20: Medizin",
    16: "FB 21: Erziehungswissenschaften",
    17: "an einem der wissenschaftlichen Zentren",
    18: "an einer Serviceeinrichtung",
    19: "in einem Museum / einer Sammlung",
    20: "sonstige Einrichtung"
}
"""Mapping der Werte in 'personal_info_2_f161d3' auf entsprechende Institute"""


def create_plot(answerlist: list, data_frame: pd.DataFrame, x_row: str | list[str], y_row: str, title: str = ''):
    fig = go.Figure()
    answers_field = data_frame[y_row].drop_duplicates().dropna()

    if not isinstance(x_row, list):
        answers_opts = data_frame[x_row].drop_duplicates().dropna()
        grouped_df = [
            [a] +
            [
                data_frame.loc[data_frame[x_row] == a].loc[data_frame[y_row] == b].shape[0] for b in answers_field
            ] for a in answers_opts
        ]
    else:
        grouped_df = [[a] + [np.sum(data_frame.loc[data_frame[y_row] == b][a]) for b in answers_field] for a in x_row]

    grouped_df = [  # Add the column containing the aggregated information
        i + [np.sum(i[1:])] for i in grouped_df
    ]
    grouped_df = pd.DataFrame(grouped_df, columns=['Answer'] + answers_field.tolist() + ['Alle'])
    grouped_df = grouped_df.sort_values('Answer')  # Antworten in der Reihenfolge des Mappings anzeigen
    columns = answers_field.tolist() + ['Alle']

    # interaktives Balkendiagramm erstellen
    for col in columns:
        fig.add_trace(go.Bar(x=grouped_df['Answer'],
                             y=grouped_df[col],
                             name=col,
                             hovertext = answerlist,
                             hovertemplate="%{hovertext}, %{y}",
                             visible=col == 'Alle',
                             marker=dict(color=grouped_df[col],
                                         cauto=True, colorscale='viridis')))
    fig.update_layout(title="",  # Die Frage soll im Markdown dargestellt werden, sonst hier Variable title einfügen
                      xaxis_title='Antworten',
                      yaxis_title='Anzahl',
                      barmode='group')
    #fig.update_traces(marker_line_width=1.5, marker_line_color="black")  # schwarze Randlinie
    fig.update_layout(updatemenus=[
        dict(
            active=len(columns) - 1,
            buttons=[dict(
                label=columns[c],
                method="update",
                args=[{"visible": [d == c for d in range(len(columns))]},
                      # {"title": f"{title} - {columns[c]}"} # Die Frage soll im Markdown dargestellt werden
                      ]
            ) for c in range(len(columns))],
            x=0.25,  # Position des Dropdown-Menüs (x-Koordinate)
            xanchor="right",
            y=1.5,  # Position des Dropdown-Menüs (y-Koordinate)
            yanchor="top"
        )
    ])
    fig.update_xaxes(tickangle=30)
    
    return fig


# Interaktive Funktion für das Dropdown-Menü
def show_plot(answerlist: list, df: pd.DataFrame, answer_tag: str | list[str], answer_mapping: dict, title: str,
              personal_info_field: str = 'personal_info_2_f161d3'):
    """
    Creates a new plot including a Dropdown select based on the personal-info field. Can be used for one column
    containing the answers or multiple columns. If the answer_tag parameter is provided as a list, a list of
    column names are expected containing the different answers in a multiple choice question. The column names will be
    translated using the answer_mapping dictionary. If the answer_tag parameter is only of type string, only one column
    is assumed and the answer_mapping dict, will be used to translate the answer keys in the column.

    :param df: The DataFrame containing the data to plot.
    :param answer_tag: The name of the column(s) containing the answers.
    :param answer_mapping: A mapping to _translate_ the columnnames/answerkeys for human presentation
    :param title: The title of the plot.
    :param personal_info_field: The column containing the personal information used to create the dropdown.
    :return: The figure to display for example in jupyter notebook.
    :raises KeyError: If an answer key (or column name) is not provided in the answer_mapping dict.
    """
    df = pd.DataFrame(df)

    df[personal_info_field] = df[personal_info_field].map(PERSONAL_INFO_MAPPING)
    if not isinstance(answer_tag, list):
        df[answer_tag] = df[answer_tag].map(answer_mapping)

    df_sorted = df.sort_values(
        by=personal_info_field,
        key=lambda x: pd.Categorical(x, categories=PERSONAL_INFO_MAPPING.values(),
                                     ordered=True)
    )  # Antworten in der Reihenfolge des Mappings anzeigen

    df_sorted = df_sorted[
        (answer_tag + [personal_info_field]) if isinstance(answer_tag, list) else [answer_tag,
                                                                                   personal_info_field]
    ]
    if isinstance(answer_tag, list):
        answer_mapping.update({personal_info_field: personal_info_field})
        answer_tag = list(map(lambda x: answer_mapping[x], answer_tag))
        df_sorted.rename(columns=answer_mapping, inplace=True)

    fig = create_plot(answerlist, df_sorted, answer_tag, personal_info_field, title)

    return fig

Frage 1#

Single Choice

Haben Sie bereits einen Artikel im green Open Access veröffentlicht?

Frage 2#

Single Choice

Haben Sie bereits eine Monographie im green Open Access veröffentlicht?

Frage 3#

Mutliple Choice

Wenn Sie schon einmal etwas zweitveröffentlicht haben: Wo?

Antwort A) auf dem Publikationsserver der UB Marburg
Antwort B) auf dem Publikationsserver/Repositorium einer anderen UB
Antwort C) auf einem fachspezifschen Repositorium
Antwort D) auf einem allgemeinen Repositorium
Antwort E) auf einem Preprint-Server
Antwort F) in wissenschaftlichen Netzwerken
Antwort G) auf der Homepage meiner Arbeitsgruppe/meines Projektes
Antwort H) auf meiner privaten Homepage
Antwort I) sonstiges

Frage 4#