Source code for text_extensions_for_pandas.jupyter.widget.core

#
#  Copyright (c) 2021 IBM Corp.
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#  http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

#
# core.py
#
# Part of text_extensions_for_pandas
#
# Contains the base elements of the dataframe/spanarray widget
#

import pandas as pd
from . import span as tep_span
from . import table as tep_table
import text_extensions_for_pandas.resources

from text_extensions_for_pandas.jupyter.widget.stubs import (
    ipw, display, clear_output, HTML)

# TODO: This try/except block is for Python 3.6 support, and should be
# reduced to just importing importlib.resources when 3.6 support is dropped.
try:
    import importlib.resources as pkg_resources
except ImportError:
    import importlib_resources as pkg_resources

_WIDGET_SCRIPT: str = pkg_resources.read_text(
    text_extensions_for_pandas.resources, "dataframe_widget.js"
)
_WIDGET_STYLE: str = pkg_resources.read_text(
    text_extensions_for_pandas.resources, "dataframe_widget.css"
)
_WIDGET_TABLE_CONVERT_SCRIPT: str = pkg_resources.read_text(
    text_extensions_for_pandas.resources, "dataframe_widget_table_converter.js"
)


[docs]class DataFrameWidget: def __init__( self, dataframe: pd.DataFrame, metadata_column: pd.Series = None, interactive_columns: list = None, ): """An instance of an interactive widget that will display Text Extension for Pandas types Span and TokenSpan in their document contexts beside a visualization of the backing dataframe. Provides interactive table elements, multiple Span coloring modes, and tools to analyze, modify, and extend DataFrame-backed datasets. :param dataframe: The DataFrame to visualize in the widget :type dataframe: pandas.DataFrame :param metadata_column: Series of selected values to pre-load into the index column, defaults to None :type metadata_column: pandas.Series, optional :param interactive_columns: List of column names to pre-set as interactive, defaults to None :type interactive_columns: list, optional """ if isinstance(dataframe.index, pd.MultiIndex): raise NotImplementedError( "There is currently no support for the pandas MultiIndex type. " "Use pandas DataFrame instead." ) self._df = dataframe.copy(deep=True) # Refreshable Outputs self._widget_output = ipw.Output() self._debug_output = ipw.Output() self._widget_output.add_class("tep--dfwidget--output") self._document_output = None # Span Visualization Globals self._tag_display = None self._color_mode = "ROW" # Initialize selected column if metadata_column is not None: md_length = len(metadata_column) # Check that metadata matches the length of the index. If too short or too long, mutate if md_length < self._df.shape[0]: metadata_column = metadata_column + [ False for _ in range(md_length, self._df.shape[0]) ] elif md_length > self._df.shape[0]: metadata_column = metadata_column[: self._df.shape[0]] # Now we have a full starting array to create a series self._metadata_column = pd.Series(metadata_column, index=self._df.index) else: self._metadata_column = pd.Series( [False for i in range(self._df.shape[0])], index=self._df.index ) # Initialize interactive columns self.interactive_columns = dict() for column in self._df.columns.values: self.interactive_columns[column] = False if interactive_columns: for column in interactive_columns: self.interactive_columns.update({column: True}) # Propagate initial values to components. self._update() # Attach the widget's script. with self._widget_output: display(HTML(f"<script>{_WIDGET_SCRIPT}</script>")) @property def selected(self) -> pd.Series: """A boolean series of the values of the selected rows in the table visualization.""" return self._metadata_column
[docs] def display(self) -> ipw.Widget: """Displays the widget. Returns a reference to the root output widget.""" return self._widget_output
[docs] def to_dataframe(self) -> pd.DataFrame: """Returns a copy of the DateFrame backing the internal state of the widget data. :return: A copy of the backing dataframe. :rtype: pandas.DataFrame """ return self._df.copy(deep=True)
[docs] def set_interactive_columns(self, columns: list): """Sets the columns to appear as interactive within the displayed widget. :param columns: A list of column names to appear as interactive :type columns: list """ # Reset the values self.interactive_columns = dict() for column in self._df.columns.values: self.interactive_columns[column] = False # Set the new values based on the parameter for column in columns: self.interactive_columns.update({column: True}) self._update()
# Internal methods to update or refresh widget state def _update(self): """Refresh the entire widget from scratch.""" with self._widget_output: clear_output(wait=True) with self._debug_output: clear_output() display(self._debug_output) display(HTML(f"<script>{_WIDGET_TABLE_CONVERT_SCRIPT}</script>")) display(HTML(f"<style>{_WIDGET_STYLE}</style>")) display(ipw.VBox([DataFrameWidgetComponent(widget=self)])) def _update_document(self): """Only refresh the document display below the table.""" if self._document_output: with self._document_output: clear_output(wait=True) display(tep_span.DataFrameDocumentContainerComponent(self)) def _update_tag(self, change): """Updates the tag displayed on spans in the document view. Observe callback.""" self._tag_display = change["new"] self._update_document() def _update_color_mode(self, change): """Updates the color mode of span rendering. Observe callback.""" self._color_mode = change["new"] self._update_document() def _update_dataframe(self, value, column_name: str, column_index: int): """Updates the value at the indicated posiiton in the dataframe. :param value: The value to insert into the DataFrame. :type value: any :param column_name: The name of the column to write to. :type column_name: str :param column_index: The integer location within that column to write the value to. :type column_index: int """ self._df.at[column_index, column_name] = value
def DataFrameWidgetComponent(widget: DataFrameWidget) -> ipw.Widget: """The base component of the dataframe widget""" # Create the render with a table. widget_components = [ tep_table.DataFrameTableComponent(widget=widget), ] # Try to generate a document. Will return NoneType if there are no spans to render. documents_widget = tep_span.DataFrameDocumentContainerComponent(widget=widget) if documents_widget: document_output = ipw.Output() document_output.add_class("tep--dfwidget--document-output") widget._document_output = document_output widget_components.append(document_output) with document_output: display(documents_widget) # Create and return a root widget node for all created components. root_widget = ipw.VBox(children=widget_components) root_widget.add_class("tep--dfwidget--root-container") return root_widget