Hello everyone,
I have tried to download the data several times according to the instructions given in "Accesing the data" (Python) and after some time (~2 Hours) I am getting an error.
The data is downloaded but not chached locally. I have all access that is required. Can someone help?
The error I get is the following:
```
ConnectionResetError Traceback (most recent call last)
~\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\response.py in _error_catcher(self)
435 try:
--> 436 yield
437
~\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\response.py in read(self, amt, decode_content, cache_content)
517 cache_content = False
--> 518 data = self._fp.read(amt) if not fp_closed else b""
519 if (
~\AppData\Local\Programs\Python\Python38\lib\http\client.py in read(self, amt)
457 b = bytearray(amt)
--> 458 n = self.readinto(b)
459 return memoryview(b)[:n].tobytes()
~\AppData\Local\Programs\Python\Python38\lib\http\client.py in readinto(self, b)
501 # (for example, reading in 1k chunks)
--> 502 n = self.fp.readinto(b)
503 if not n and b:
~\AppData\Local\Programs\Python\Python38\lib\socket.py in readinto(self, b)
668 try:
--> 669 return self._sock.recv_into(b)
670 except timeout:
~\AppData\Local\Programs\Python\Python38\lib\ssl.py in recv_into(self, buffer, nbytes, flags)
1240 self.__class__)
-> 1241 return self.read(nbytes, buffer)
1242 else:
~\AppData\Local\Programs\Python\Python38\lib\ssl.py in read(self, len, buffer)
1098 if buffer is not None:
-> 1099 return self._sslobj.read(len, buffer)
1100 else:
ConnectionResetError: [WinError 10054] Eine vorhandene Verbindung wurde vom Remotehost geschlossen
During handling of the above exception, another exception occurred:
ProtocolError Traceback (most recent call last)
~\AppData\Local\Programs\Python\Python38\lib\site-packages\requests\models.py in generate()
750 try:
--> 751 for chunk in self.raw.stream(chunk_size, decode_content=True):
752 yield chunk
~\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\response.py in stream(self, amt, decode_content)
574 while not is_fp_closed(self._fp):
--> 575 data = self.read(amt=amt, decode_content=decode_content)
576
~\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\response.py in read(self, amt, decode_content, cache_content)
539 # Content-Length are caught.
--> 540 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
541
~\AppData\Local\Programs\Python\Python38\lib\contextlib.py in __exit__(self, type, value, traceback)
130 try:
--> 131 self.gen.throw(type, value, traceback)
132 except StopIteration as exc:
~\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\response.py in _error_catcher(self)
453 # This includes IncompleteRead.
--> 454 raise ProtocolError("Connection broken: %r" % e, e)
455
ProtocolError: ("Connection broken: ConnectionResetError(10054, 'Eine vorhandene Verbindung wurde vom Remotehost geschlossen', None, 10054, None)", ConnectionResetError(10054, 'Eine vorhandene Verbindung wurde vom Remotehost geschlossen', None, 10054, None))
During handling of the above exception, another exception occurred:
ChunkedEncodingError Traceback (most recent call last)
in
1 sensor_measurements_query = syn.tableQuery("select * from syn20681931")
----> 2 sensor_measurements_paths = syn.downloadTableColumns(sensor_measurements_query, "data_file_handle_id") # download data
3 sensor_measurements = sensor_measurements_query.asDataFrame() # pandas DataFrame
4 sensor_measurements['path'] = sensor_measurements.data_file_handle_id.astype(str).map(sensor_measurements_paths)
~\AppData\Local\Programs\Python\Python38\lib\site-packages\synapseclient\client.py in downloadTableColumns(self, table, columns, downloadLocation, **kwargs)
3509 zipfilepath = os.path.join(temp_dir, "table_file_download.zip")
3510 try:
-> 3511 zipfilepath = self._downloadFileHandle(response['resultZipFileHandleId'], table.tableId, 'TableEntity',
3512 zipfilepath)
3513 # TODO handle case when no zip file is returned
~\AppData\Local\Programs\Python\Python38\lib\site-packages\synapseclient\client.py in _downloadFileHandle(self, fileHandleId, objectId, objectType, destination, retries)
1786 # multiple downloading threads. otherwise it's more efficient to run the download as a simple
1787 # single threaded URL download.
-> 1788 downloaded_path = self._download_from_url_multi_threaded(fileHandleId,
1789 objectId,
1790 objectType,
~\AppData\Local\Programs\Python\Python38\lib\site-packages\synapseclient\client.py in _download_from_url_multi_threaded(self, file_handle_id, object_id, object_type, destination, expected_md5)
1831 path=temp_destination)
1832
-> 1833 multithread_download.download_file(self, request)
1834
1835 if expected_md5: # if md5 not set (should be the case for all except http download)
~\AppData\Local\Programs\Python\Python38\lib\site-packages\synapseclient\core\multithread_download\download_threads.py in download_file(client, download_request, max_concurrent_parts)
230 try:
231 downloader = _MultithreadedDownloader(client, executor, max_concurrent_parts)
--> 232 downloader.download_file(download_request)
233 finally:
234 # if we created the Executor for the purposes of processing this download we also
~\AppData\Local\Programs\Python\Python38\lib\site-packages\synapseclient\core\multithread_download\download_threads.py in download_file(self, request)
295 )
296
--> 297 self._write_chunks(request, completed_futures, transfer_status)
298
299 # once there is nothing else pending we are done with the file download
~\AppData\Local\Programs\Python\Python38\lib\site-packages\synapseclient\core\multithread_download\download_threads.py in _write_chunks(request, completed_futures, transfer_status)
370 for chunk_future in completed_futures:
371 start, chunk_response = chunk_future.result()
--> 372 chunk_data = chunk_response.content
373 file_write.seek(start)
374 file_write.write(chunk_response.content)
~\AppData\Local\Programs\Python\Python38\lib\site-packages\requests\models.py in content(self)
827 self._content = None
828 else:
--> 829 self._content = b''.join(self.iter_content(CONTENT_CHUNK_SIZE)) or b''
830
831 self._content_consumed = True
~\AppData\Local\Programs\Python\Python38\lib\site-packages\requests\models.py in generate()
752 yield chunk
753 except ProtocolError as e:
--> 754 raise ChunkedEncodingError(e)
755 except DecodeError as e:
756 raise ContentDecodingError(e)
ChunkedEncodingError: ("Connection broken: ConnectionResetError(10054, 'Eine vorhandene Verbindung wurde vom Remotehost geschlossen', None, 10054, None)", ConnectionResetError(10054, 'Eine vorhandene Verbindung wurde vom Remotehost geschlossen', None, 10054, None))
```
I would be very thankful if someone could help me out.
Created by Vanessa Binöder vanessabinoeder Hi Vanessa, the initial error that caused the python client errors was the network connection being reset.
> ConnectionResetError: [WinError 10054] Eine vorhandene Verbindung wurde vom Remotehost geschlossen
This could indicate a broader issue with the Synapse backend, but network errors like this can happen routinely, too, especially with long running jobs which present more opportunities for network issues to arise.
If you were using the `syn.downloadTableColumns` method (the same one used in "Accessing the data"), then the data you've downloaded so far is still cached -- it's just a bit hard to find because the cache location is a hidden directory in the home directory by default.
If you re-run the same code, the client will recognize that you've already downloaded some of the data, and will continue downloading where it left off before the network error.