  Mudit Desai

    Mudit Desai

    Apr 30, 2019
    import pandas as pd
    import numpy as np
    import matplotlib.pylab as plt
    import seaborn as sns
    import scipy.stats as stats
    import statsmodels
    import statsmodels.api as sm
    import statsmodels.formula.api as smf
    from statsmodels.formula.api import ols
    from sklearn.linear_model import LinearRegression

    df= pd.read_csv('311_Service_Requests_from_2010_to_Present.csv')
    df['Created Date'] = pd.to_datetime(df['Created Date'])
    df['Closed Date'] = pd.to_datetime(df['Closed Date'])
    df['Request_Closing_Time'] = (df['Closed Date'] - df['Created Date']).dt.days
    df = df[pd.notnull(df['Closed Date'])]
    df['Complaint Type1'] = pd.Categorical(df['Complaint Type'])

    print(stats.normaltest(df['Request_Closing_Time'], axis=0))
    NormaltestResult(statistic=821999.7644654972, pvalue=0.0)

    model = ols('Request_Closing_Time~Complaint Type1',data=df).fit()

    File "<unknown>", line 1
    Complaint Type1
    SyntaxError: invalid syntax

    df = pd.DataFrame({'Complaint Type1':X, 'Request_Closing_Time':Y})
    model = smf.ols('Y~X', data=df).fit()

    1df = pd.DataFrame({'Complaint Type1':X, 'Request_Closing_Time':Y})
    2 model = smf.ols('Y~X', data=df).fit()

    NameError: name 'X' is not defined

    table = sm.stats.anova_lm(model, typ=2)

    > 1table = sm.stats.anova_lm(model, typ=2)
    2 print(table)

    NameError: name 'model' is not defined

    Can you help me to find the error in creating the model?

