Yesterday the following code was giving me the dataframe without problem. But today it started giving me the following error:
http.client.RemoteDisconnected: Remote end closed connection without response
Any solution? I know the problem is from the website but I need some solution that can help me keep using the same code
import datetime
import pandas as pd
import sqlalchemy
from datetime import datetime
from datetime import date, timedelta
#d = datetime.today().strftime('%d-%m-%Y')
data_atual=datetime.now()
mes_atual = data_atual.month
ano_atual= data_atual.year
df = []
for j in range (2020, ano_atual+1):
for i in range (9,13):
frame = []
df1 = pd.read_html('https://www.centrodeinformacao.ren.pt/userControls/GetExcel.aspx?T=REN_MENSAL&P='+str(j)+'&PP='+str(i)+'&PPP=26&PPPP=36&PPPPP=0&variation=PT', decimal=',', thousands=',')[0]
df2 = pd.read_html('https://www.centrodeinformacao.ren.pt/userControls/GetExcel.aspx?T=REN_MENSAL&P='+str(j)+'&PP='+str(i)+'&PPP=26&PPPP=36&PPPPP=0&variation=PT', decimal=',', thousands=',')[2]
frame.append(df1)
frame.append(df2)
print(frame)
result = pd.concat(frame)
print(result)
engine = sqlalchemy.create_engine('mysql+pymysql://root:1234@localhost:3306/projeto')
result.to_sql(
name='renmensal'+str(i)+str(j), # database table name
con=engine,
if_exists='replace',
index=False
)
Answer
The error http.client.RemoteDisconnected: Remote end closed connection without response
typically indicates that the server is closing the connection unexpectedly, often due to too many requests in a short period or server-side restrictions.
Steps to Address the Issue
-
Add Delays Between Requests
To avoid overwhelming the server, introduce a delay between each request.import time time.sleep(2) # 2-second delay between requests
-
Use a Session with
requests
A persistent session may help reduce the chance of connection issues by reusing the same connection.import requests session = requests.Session()
-
Handle Connection Errors Gracefully
Use a try-except block to catch and retry failed requests.import requests from requests.exceptions import ConnectionError, HTTPError for j in range(2020, ano_atual + 1): for i in range(9, 13): try: df1 = pd.read_html(session.get(f'https://www.centrodeinformacao.ren.pt/userControls/GetExcel.aspx?T=REN_MENSAL&P={j}&PP={i}&PPP=26&PPPP=36&PPPPP=0&variation=PT', timeout=10).content, decimal=',', thousands=',')[0] df2 = pd.read_html(session.get(f'https://www.centrodeinformacao.ren.pt/userControls/GetExcel.aspx?T=REN_MENSAL&P={j}&PP={i}&PPP=26&PPPP=36&PPPPP=0&variation=PT', timeout=10).content, decimal=',', thousands=',')[2] except (ConnectionError, HTTPError) as e: print(f"Error fetching data for year {j}, month {i}: {e}") continue
-
Respect the Server's Load
If the server is rate-limited, check its documentation (if available) to ensure you're complying with its request limits. -
Retry Logic for Resiliency
Add retry logic for failed requests.import time def fetch_with_retries(url, retries=3, delay=5): for attempt in range(retries): try: response = session.get(url, timeout=10) response.raise_for_status() # Raise HTTPError for bad responses return response except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError) as e: print(f"Attempt {attempt + 1} failed: {e}") time.sleep(delay) raise Exception(f"Failed to fetch data after {retries} attempts") for j in range(2020, ano_atual + 1): for i in range(9, 13): try: url = f'https://www.centrodeinformacao.ren.pt/userControls/GetExcel.aspx?T=REN_MENSAL&P={j}&PP={i}&PPP=26&PPPP=36&PPPPP=0&variation=PT' response = fetch_with_retries(url) df1 = pd.read_html(response.content, decimal=',', thousands=',')[0] df2 = pd.read_html(response.content, decimal=',', thousands=',')[2] except Exception as e: print(f"Error fetching data for year {j}, month {i}: {e}") continue
-
Alternative: Cache Data Locally
If possible, download the data manually or implement a caching mechanism to avoid repeatedly querying the server for the same data.
Complete Revised Script
Here’s the fully revised code:
import pandas as pd
import sqlalchemy
import requests
import time
from datetime import datetime
data_atual = datetime.now()
mes_atual = data_atual.month
ano_atual = data_atual.year
session = requests.Session()
df = []
def fetch_with_retries(url, retries=3, delay=5):
for attempt in range(retries):
try:
response = session.get(url, timeout=10)
response.raise_for_status()
return response
except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError) as e:
print(f"Attempt {attempt + 1} failed: {e}")
time.sleep(delay)
raise Exception(f"Failed to fetch data after {retries} attempts")
for j in range(2020, ano_atual + 1):
for i in range(9, 13):
try:
url = f'https://www.centrodeinformacao.ren.pt/userControls/GetExcel.aspx?T=REN_MENSAL&P={j}&PP={i}&PPP=26&PPPP=36&PPPPP=0&variation=PT'
response = fetch_with_retries(url)
df1 = pd.read_html(response.content, decimal=',', thousands=',')[0]
df2 = pd.read_html(response.content, decimal=',', thousands=',')[2]
result = pd.concat([df1, df2])
engine = sqlalchemy.create_engine('mysql+pymysql://root:1234@localhost:3306/projeto')
result.to_sql(name=f'renmensal{i}{j}', con=engine, if_exists='replace', index=False)
except Exception as e:
print(f"Error processing data for year {j}, month {i}: {e}")
This script introduces retries, session management, delays, and robust error handling to reduce the likelihood of encountering RemoteDisconnected
.