"""the file, which mainly manages the data extraction process"""
import pandas as pd
from enum import Enum
from loguru import logger
from src.core.meta import GameTable, TimeTable, ImportType
from src.core.process.pipelines.client import ClientKeys, tableTypeForClient, needsAggClient, needsMetaDataClient
from src.core.process.pipelines.matchv5 import MatchV5Keys, tableTypeForMatchV5, needsAggMatchV5, needsMetaDataMatchV5
from src.utils.pandas import dropListEntries, mergeTables, indexByOneVariable, concatOnPrefixes
[docs]
class ImportPipeline(Enum):
"""possible pipelines, we use currently
Attributes
----------
CLIENT : ClientKeys
the pipeline for the client dumped data
MATCHV5 : MatchV5Keys
the pipeline for the matchv5 gathered data
"""
CLIENT = ClientKeys
MATCHV5 = MatchV5Keys
pipeToAgg: dict[ImportPipeline, dict] = {
ImportPipeline.CLIENT: needsAggClient,
ImportPipeline.MATCHV5: needsAggMatchV5
}
"""dict, which maps the pipeline to the aggregation helper"""
pipeToMeta: dict[ImportPipeline, dict] = {
ImportPipeline.CLIENT: needsMetaDataClient,
ImportPipeline.MATCHV5: needsMetaDataMatchV5
}
"""dict, which maps the pipeline to the metapath helper"""
[docs]
def classify(member: ClientKeys | MatchV5Keys) -> GameTable | TimeTable:
"""classifys the GameTabletype for a Key
Parameters
----------
member : ClientKeys | matchV5Keys
the member of one of the enums
Returns
-------
the table, which it belongs to
"""
logger.trace(f"classified the member {member.value}.")
# the client case
if isinstance(member, ClientKeys):
return tableTypeForClient[member]
# the matchv5 case
return tableTypeForMatchV5[member]
[docs]
def getData(
data: dict,
pathKey: MatchV5Keys | ClientKeys,
typ: ImportType,
metaKey: list[list[str]] | None = None,
agg: bool = False,
) -> pd.DataFrame:
"""parses the data according the passed pathKey
Parameters
----------
data : dict
the raw data, extracted from a .json gamefile
pathKey : MatchV5Keys | ClientKeys
the path to the data in the json tree of the gamefile
metaKey : list[list[str]] | None, optional
the path to a meta variable. this is only used, if the jsontree contains deeper lists,
that we have to parse in an extra step.
agg : bool, optional
if passed, the tables gets aggregated with indexbyoneVariable
Returns
-------
dataframe : pd.DataFrame
the resulting table
"""
logger.trace("Start normalizing the raw data dict via pandas.")
normalizedDf = pd.json_normalize(
data,
record_path=pathKey.value,
meta=metaKey,
sep="_"
)
listlessDf = dropListEntries(normalizedDf)
if agg:
logger.debug("Start aggregating the resulting datatable.")
match typ:
case ImportType.GENERAL:
return indexByOneVariable(listlessDf, "_".join(metaKey[0]))
case ImportType.TIMELINE:
return concatOnPrefixes(listlessDf, [
"participantFrames_1_",
"participantFrames_2_",
"participantFrames_3_",
"participantFrames_4_",
"participantFrames_5_",
"participantFrames_6_",
"participantFrames_7_",
"participantFrames_8_",
"participantFrames_9_",
"participantFrames_10_",
], "participantid")
return listlessDf