Streudiagramme Teil 6

Im Streudiagramm erkennt man durch das Muster der Punkte Informationen über die Abhängigkeitsstruktur der beiden Merkmale.

Stand: 27.04.2021

import matplotlib.pyplot as plt
import numpy as np
from pandas.plotting import scatter_matrix
#df['logarithm'] = np.log(df['Temperatur']) 
#dfny = df.dropna()
#scatter_matrix(df['logarithm'])

#df.plot.scatter(df, loglog=True)
scatter_matrix(df_wasserkuppedrop, figsize=(15,7))

array([[<AxesSubplot:xlabel='Temperatur', ylabel='Temperatur'>,
        <AxesSubplot:xlabel='Luftdruck', ylabel='Temperatur'>,
        <AxesSubplot:xlabel='Kohlendioxid', ylabel='Temperatur'>,
        <AxesSubplot:xlabel='ppm', ylabel='Temperatur'>],
       [<AxesSubplot:xlabel='Temperatur', ylabel='Luftdruck'>,
        <AxesSubplot:xlabel='Luftdruck', ylabel='Luftdruck'>,
        <AxesSubplot:xlabel='Kohlendioxid', ylabel='Luftdruck'>,
        <AxesSubplot:xlabel='ppm', ylabel='Luftdruck'>],
       [<AxesSubplot:xlabel='Temperatur', ylabel='Kohlendioxid'>,
        <AxesSubplot:xlabel='Luftdruck', ylabel='Kohlendioxid'>,
        <AxesSubplot:xlabel='Kohlendioxid', ylabel='Kohlendioxid'>,
        <AxesSubplot:xlabel='ppm', ylabel='Kohlendioxid'>],
       [<AxesSubplot:xlabel='Temperatur', ylabel='ppm'>,
        <AxesSubplot:xlabel='Luftdruck', ylabel='ppm'>,
        <AxesSubplot:xlabel='Kohlendioxid', ylabel='ppm'>,
        <AxesSubplot:xlabel='ppm', ylabel='ppm'>]], dtype=object)


png

dfco2m = dfco2m.drop(columns = ['MA3'])
dfco2m
TemperaturLuftdruckKohlendioxidppm
Datum
2011-01-319.8619051018.083333718.535714377.267934
2011-02-2810.9333331015.166667718.979167380.087965
2011-03-3110.7385421012.093750723.416667383.434028
2011-04-3010.9776601008.351064724.648936385.891135
2011-05-318.3864581008.500000723.479167381.627605
...............
2020-08-315.6708331020.233333751.100000387.711256
2020-09-307.1741671014.941667745.800000389.088071
2020-10-315.6933331009.225000741.425000386.948444
2020-11-302.7158331012.291667745.200000383.644441
2020-12-312.3841671013.800000746.466667383.237839

120 rows × 4 columns

import matplotlib.pyplot as plt
import numpy as np
from pandas.plotting import scatter_matrix
#df['logarithm'] = np.log(df['Temperatur']) 
#dfny = df.dropna()
#scatter_matrix(df['logarithm'])

#df.plot.scatter(df, loglog=True)

scatter_matrix(dfco2m , figsize=(15,7))

array([[<AxesSubplot:xlabel='Temperatur', ylabel='Temperatur'>,
        <AxesSubplot:xlabel='Luftdruck', ylabel='Temperatur'>,
        <AxesSubplot:xlabel='Kohlendioxid', ylabel='Temperatur'>,
        <AxesSubplot:xlabel='ppm', ylabel='Temperatur'>],
       [<AxesSubplot:xlabel='Temperatur', ylabel='Luftdruck'>,
        <AxesSubplot:xlabel='Luftdruck', ylabel='Luftdruck'>,
        <AxesSubplot:xlabel='Kohlendioxid', ylabel='Luftdruck'>,
        <AxesSubplot:xlabel='ppm', ylabel='Luftdruck'>],
       [<AxesSubplot:xlabel='Temperatur', ylabel='Kohlendioxid'>,
        <AxesSubplot:xlabel='Luftdruck', ylabel='Kohlendioxid'>,
        <AxesSubplot:xlabel='Kohlendioxid', ylabel='Kohlendioxid'>,
        <AxesSubplot:xlabel='ppm', ylabel='Kohlendioxid'>],
       [<AxesSubplot:xlabel='Temperatur', ylabel='ppm'>,
        <AxesSubplot:xlabel='Luftdruck', ylabel='ppm'>,
        <AxesSubplot:xlabel='Kohlendioxid', ylabel='ppm'>,
        <AxesSubplot:xlabel='ppm', ylabel='ppm'>]], dtype=object)


png

dfco2m.plot.scatter(x='Temperatur', y='Luftdruck', loglog=False, alpha=1, figsize=(15,7))
plt.show()


png

dfco2m.plot.scatter(x='Temperatur', y='ppm', loglog=False, alpha=1, figsize=(15,7))
plt.show()


png

dfco2m.plot.scatter(x='Luftdruck', y='ppm', loglog=False, alpha=1, figsize=(15,7))
plt.show()


png

3D Streudiagramm

#https://stackoverflow.com/questions/59232073/scatter-plot-with-3-variables-in-matplotlib

#https://www.advsofteng.com/doc/cdpydoc/threedscatter2.htm Dropline



import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np

x = np.array(dfco2m['Temperatur'])
y = np.array(dfco2m['Luftdruck'])
z = np.array(dfco2m['ppm'])

fig = plt.figure(figsize=(20, 20))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(x, y, z, 
           linewidths=1, alpha=.7,
           edgecolor='k',
           s = 200,
           c='green',
           )
plt.show()


png

dfco2m.info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 120 entries, 2011-01-31 to 2020-12-31
Freq: M
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Temperatur    120 non-null    float64
 1   Luftdruck     120 non-null    float64
 2   Kohlendioxid  120 non-null    float64
 3   ppm           120 non-null    float64
dtypes: float64(4)
memory usage: 4.7 KB
x.shape
(120,)
y.shape
(120,)
z.shape
(120,)

ToDO: Droplines https://matplotlib.org/devdocs/gallery/mplot3d/stem3d_demo.html
Siehe: https://support.minitab.com/de-de/minitab/19/help-and-how-to/graphs/3d-scatterplot/interpret-the-results/key-results/