--- /srv/rebuilderd/tmp/rebuilderdNrZLsS/inputs/python3-pydataverse_0.3.4-1_all.deb +++ /srv/rebuilderd/tmp/rebuilderdNrZLsS/out/python3-pydataverse_0.3.4-1_all.deb ├── file list │ @@ -1,3 +1,3 @@ │ -rw-r--r-- 0 0 0 4 2024-09-23 07:05:56.000000 debian-binary │ --rw-r--r-- 0 0 0 3288 2024-09-23 07:05:56.000000 control.tar.xz │ --rw-r--r-- 0 0 0 126224 2024-09-23 07:05:56.000000 data.tar.xz │ +-rw-r--r-- 0 0 0 3292 2024-09-23 07:05:56.000000 control.tar.xz │ +-rw-r--r-- 0 0 0 126168 2024-09-23 07:05:56.000000 data.tar.xz ├── control.tar.xz │ ├── control.tar │ │ ├── ./control │ │ │ @@ -1,13 +1,13 @@ │ │ │ Package: python3-pydataverse │ │ │ Source: pydataverse │ │ │ Version: 0.3.4-1 │ │ │ Architecture: all │ │ │ Maintainer: Debian Python Team │ │ │ -Installed-Size: 748 │ │ │ +Installed-Size: 746 │ │ │ Depends: python3-httpx, python3-jsonschema, python3:any, libjs-sphinxdoc (>= 7.4) │ │ │ Built-Using: alabaster (= 0.7.16-0.1), sphinx (= 7.4.7-3) │ │ │ Section: python │ │ │ Priority: optional │ │ │ Homepage: https://github.com/gdcc/pyDataverse │ │ │ Description: Python module for interacting with Dataverse APIs │ │ │ pyDataverse provides a comprehensive Python interface for interacting with │ │ ├── ./md5sums │ │ │ ├── ./md5sums │ │ │ │┄ Files differ ├── data.tar.xz │ ├── data.tar │ │ ├── file list │ │ │ @@ -81,17 +81,17 @@ │ │ │ -rw-r--r-- 0 root (0) root (0) 4057 2024-09-23 07:05:56.000000 ./usr/share/doc/python3-pydataverse/html/search.html │ │ │ -rw-r--r-- 0 root (0) root (0) 30577 2024-09-23 07:05:56.000000 ./usr/share/doc/python3-pydataverse/html/searchindex.js │ │ │ drwxr-xr-x 0 root (0) root (0) 0 2024-09-23 07:05:56.000000 ./usr/share/doc/python3-pydataverse/html/snippets/ │ │ │ -rw-r--r-- 0 root (0) root (0) 4360 2024-09-23 07:05:56.000000 ./usr/share/doc/python3-pydataverse/html/snippets/pip-install.html │ │ │ -rw-r--r-- 0 root (0) root (0) 4727 2024-09-23 07:05:56.000000 ./usr/share/doc/python3-pydataverse/html/snippets/requirements.html │ │ │ -rw-r--r-- 0 root (0) root (0) 4349 2024-09-23 07:05:56.000000 ./usr/share/doc/python3-pydataverse/html/snippets/warning_production.html │ │ │ drwxr-xr-x 0 root (0) root (0) 0 2024-09-23 07:05:56.000000 ./usr/share/doc/python3-pydataverse/html/user/ │ │ │ --rw-r--r-- 0 root (0) root (0) 24601 2024-09-23 07:05:56.000000 ./usr/share/doc/python3-pydataverse/html/user/advanced-usage.html │ │ │ --rw-r--r-- 0 root (0) root (0) 43662 2024-09-23 07:05:56.000000 ./usr/share/doc/python3-pydataverse/html/user/basic-usage.html │ │ │ --rw-r--r-- 0 root (0) root (0) 16527 2024-09-23 07:05:56.000000 ./usr/share/doc/python3-pydataverse/html/user/csv-templates.html │ │ │ +-rw-r--r-- 0 root (0) root (0) 24001 2024-09-23 07:05:56.000000 ./usr/share/doc/python3-pydataverse/html/user/advanced-usage.html │ │ │ +-rw-r--r-- 0 root (0) root (0) 43422 2024-09-23 07:05:56.000000 ./usr/share/doc/python3-pydataverse/html/user/basic-usage.html │ │ │ +-rw-r--r-- 0 root (0) root (0) 16287 2024-09-23 07:05:56.000000 ./usr/share/doc/python3-pydataverse/html/user/csv-templates.html │ │ │ -rw-r--r-- 0 root (0) root (0) 6473 2024-09-23 07:05:56.000000 ./usr/share/doc/python3-pydataverse/html/user/faq.html │ │ │ -rw-r--r-- 0 root (0) root (0) 10287 2024-09-23 07:05:56.000000 ./usr/share/doc/python3-pydataverse/html/user/installation.html │ │ │ -rw-r--r-- 0 root (0) root (0) 7068 2024-09-23 07:05:56.000000 ./usr/share/doc/python3-pydataverse/html/user/resources.html │ │ │ -rw-r--r-- 0 root (0) root (0) 11300 2024-09-23 07:05:56.000000 ./usr/share/doc/python3-pydataverse/html/user/use-cases.html │ │ │ drwxr-xr-x 0 root (0) root (0) 0 2024-09-23 07:05:56.000000 ./usr/share/doc-base/ │ │ │ -rw-r--r-- 0 root (0) root (0) 227 2024-09-23 07:03:07.000000 ./usr/share/doc-base/python3-pydataverse.pydataverse │ │ │ lrwxrwxrwx 0 root (0) root (0) 0 2024-09-23 07:05:56.000000 ./usr/share/doc/python3-pydataverse/html/_static/doctools.js -> ../../../../javascript/sphinxdoc/1.0/doctools.js │ │ ├── ./usr/share/doc/python3-pydataverse/html/user/advanced-usage.html │ │ │ @@ -80,32 +80,32 @@ │ │ │

Add the files you have filled in the org.filename cell in datafiles.csv │ │ │ and then place them in the root directory (or any other specified directory).

│ │ │ │ │ │
│ │ │

Import CSV files

│ │ │

Import the CSV files with │ │ │ read_csv_as_dicts(). │ │ │ -This creates a list of dict’s, automatically imports │ │ │ +This creates a list of dict’s, automatically imports │ │ │ the Dataverse Software’s own metadata attribute (dv. prefix), │ │ │ converts boolean values, and loads JSON cells properly.

│ │ │
>>> import os
│ │ │  >>> from pyDataverse.utils import read_csv_as_dicts
│ │ │  >>> csv_datasets_filename = "datasets.csv"
│ │ │  >>> ds_data = read_csv_as_dicts(csv_datasets_filename)
│ │ │  >>> csv_datafiles_filename = "datafiles.csv"
│ │ │  >>> df_data = read_csv_as_dicts(csv_datafiles_filename)
│ │ │  
│ │ │
│ │ │

Once we have the data in Python, we can easily import the data into │ │ │ pyDataverse.

│ │ │ -

For this, loop over each Dataset dict, to:

│ │ │ +

For this, loop over each Dataset dict, to:

│ │ │
    │ │ │
  1. Instantiate an empty Dataset

  2. │ │ │
  3. add the data with set() and

  4. │ │ │ -
  5. append the instance to a list.

  6. │ │ │ +
  7. append the instance to a list.

  8. │ │ │
│ │ │
>>> from pyDataverse.models import Dataset
│ │ │  >>> ds_lst = []
│ │ │  >>> for ds in ds_data:
│ │ │  >>>     ds_obj = Dataset()
│ │ │  >>>     ds_obj.set(ds)
│ │ │  >>>     ds_lst.append(ds_obj)
│ │ │ @@ -134,15 +134,15 @@
│ │ │  
  • BASE_URL: Base URL of your Dataverse installation, without trailing slash (e. g. https://data.aussda.at))

  • │ │ │
  • API_TOKEN: API token of a Dataverse user with proper rights to create a Dataset and upload Datafiles

  • │ │ │ │ │ │
    >>> from pyDataverse.api import NativeApi
    │ │ │  >>> api = NativeApi(BASE_URL, API_TOKEN)
    │ │ │  
    │ │ │
    │ │ │ -

    Loop over the list of Dataset’s, │ │ │ +

    Loop over the list of Dataset’s, │ │ │ upload the metadata with │ │ │ create_dataset() and collect │ │ │ all dataset_id’s and pid’s in dataset_id_2_pid.

    │ │ │

    Note: The Dataverse collection assigned to dv_alias must be published in order to add a Dataset to it.

    │ │ │
    >>> dv_alias = ":root:"
    │ │ │  >>> dataset_id_2_pid = {}
    │ │ │  >>> for ds in ds_lst:
    │ │ │ @@ -150,15 +150,15 @@
    │ │ │  >>>     dataset_id_2_pid[ds.get()["org.dataset_id"]] = resp.json()["data"]["persistentId"]
    │ │ │  Dataset with pid 'doi:10.5072/FK2/WVMDFE' created.
    │ │ │  
    │ │ │
    │ │ │

    The API requests always return a │ │ │ httpx.Response object, which can then be used │ │ │ to extract the data.

    │ │ │ -

    Next, we’ll do the same for the list of │ │ │ +

    Next, we’ll do the same for the list of │ │ │ Datafile’s with │ │ │ upload_datafile(). │ │ │ In addition to the metadata, the PID (Persistent Identifier, which is mostly the DOI) and the filename must be passed.

    │ │ │
    >>> for df in df_lst:
    │ │ │  >>>     pid = dataset_id_2_pid[df.get()["org.dataset_id"]]
    │ │ │  >>>     filename = os.path.join(os.getcwd(), df.get()["org.filename"])
    │ │ │  >>>     df.set({"pid": pid, "filename": filename})
    │ │ │ ├── html2text {}
    │ │ │ │ @@ -35,29 +35,29 @@
    │ │ │ │  No matter what you choose, you have to have properly formatted CSV files
    │ │ │ │  (datasets.csv and datafiles.csv) before moving on.
    │ │ │ │  Don’t forget: Some columns must be entered in a JSON format!
    │ │ │ │  ******** AAdddd ddaattaaffiilleess_?¶ ********
    │ │ │ │  Add the files you have filled in the org.filename cell in datafiles.csv and
    │ │ │ │  then place them in the root directory (or any other specified directory).
    │ │ │ │  ******** IImmppoorrtt CCSSVV ffiilleess_?¶ ********
    │ │ │ │ -Import the CSV files with read_csv_as_dicts(). This creates a list of _d_i_c_t’s,
    │ │ │ │ +Import the CSV files with read_csv_as_dicts(). This creates a list of dict’s,
    │ │ │ │  automatically imports the Dataverse Software’s own metadata attribute (dv.
    │ │ │ │  prefix), converts boolean values, and loads JSON cells properly.
    │ │ │ │  >>> import os
    │ │ │ │  >>> from pyDataverse.utils import read_csv_as_dicts
    │ │ │ │  >>> csv_datasets_filename = "datasets.csv"
    │ │ │ │  >>> ds_data = read_csv_as_dicts(csv_datasets_filename)
    │ │ │ │  >>> csv_datafiles_filename = "datafiles.csv"
    │ │ │ │  >>> df_data = read_csv_as_dicts(csv_datafiles_filename)
    │ │ │ │  Once we have the data in Python, we can easily import the data into
    │ │ │ │  pyDataverse.
    │ │ │ │ -For this, loop over each Dataset _d_i_c_t, to:
    │ │ │ │ +For this, loop over each Dataset dict, to:
    │ │ │ │     1. Instantiate an empty Dataset
    │ │ │ │     2. add the data with set() and
    │ │ │ │ -   3. append the instance to a _l_i_s_t.
    │ │ │ │ +   3. append the instance to a list.
    │ │ │ │  >>> from pyDataverse.models import Dataset
    │ │ │ │  >>> ds_lst = []
    │ │ │ │  >>> for ds in ds_data:
    │ │ │ │  >>>     ds_obj = Dataset()
    │ │ │ │  >>>     ds_obj.set(ds)
    │ │ │ │  >>>     ds_lst.append(ds_obj)
    │ │ │ │  To import the Datafile’s, do the same with df_data: set() the Datafile
    │ │ │ │ @@ -74,28 +74,28 @@
    │ │ │ │  Dataverse installation’s data before executing the lines:
    │ │ │ │      * BASE_URL: Base URL of your Dataverse installation, without trailing slash
    │ │ │ │        (e. g. https://data.aussda.at))
    │ │ │ │      * API_TOKEN: API token of a Dataverse user with proper rights to create a
    │ │ │ │        Dataset and upload Datafiles
    │ │ │ │  >>> from pyDataverse.api import NativeApi
    │ │ │ │  >>> api = NativeApi(BASE_URL, API_TOKEN)
    │ │ │ │ -Loop over the _l_i_s_t of Dataset’s, upload the metadata with create_dataset() and
    │ │ │ │ +Loop over the list of Dataset’s, upload the metadata with create_dataset() and
    │ │ │ │  collect all dataset_id’s and pid’s in dataset_id_2_pid.
    │ │ │ │  Note: The Dataverse collection assigned to dv_alias must be published in order
    │ │ │ │  to add a Dataset to it.
    │ │ │ │  >>> dv_alias = ":root:"
    │ │ │ │  >>> dataset_id_2_pid = {}
    │ │ │ │  >>> for ds in ds_lst:
    │ │ │ │  >>>     resp = api.create_dataset(dv_alias, ds.json())
    │ │ │ │  >>>     dataset_id_2_pid[ds.get()["org.dataset_id"]] = resp.json()["data"]
    │ │ │ │  ["persistentId"]
    │ │ │ │  Dataset with pid 'doi:10.5072/FK2/WVMDFE' created.
    │ │ │ │  The API requests always return a httpx.Response object, which can then be used
    │ │ │ │  to extract the data.
    │ │ │ │ -Next, we’ll do the same for the _l_i_s_t of Datafile’s with upload_datafile(). In
    │ │ │ │ +Next, we’ll do the same for the list of Datafile’s with upload_datafile(). In
    │ │ │ │  addition to the metadata, the PID (Persistent Identifier, which is mostly the
    │ │ │ │  DOI) and the filename must be passed.
    │ │ │ │  >>> for df in df_lst:
    │ │ │ │  >>>     pid = dataset_id_2_pid[df.get()["org.dataset_id"]]
    │ │ │ │  >>>     filename = os.path.join(os.getcwd(), df.get()["org.filename"])
    │ │ │ │  >>>     df.set({"pid": pid, "filename": filename})
    │ │ │ │  >>>     resp = api.upload_datafile(pid, filename, df.json())
    │ │ ├── ./usr/share/doc/python3-pydataverse/html/user/basic-usage.html
    │ │ │ @@ -179,15 +179,15 @@
    │ │ │  
    >>> ds.validate_json()
    │ │ │  True
    │ │ │  
    │ │ │
    │ │ │

    Adding or updating data manually is easy. With │ │ │ set() │ │ │ you can pass any attribute you want as a collection of key-value │ │ │ -pairs in a dict:

    │ │ │ +pairs in a dict:

    │ │ │
    >>> ds.get()["title"]
    │ │ │  Youth in Austria 2005
    │ │ │  >>> ds.set({"title": "Youth from Austria 2005"})
    │ │ │  >>> ds.get()["title"]
    │ │ │  Youth from Austria 2005
    │ │ │  
    │ │ │
    │ │ │ @@ -199,15 +199,15 @@ │ │ │
    >>> resp = api.create_dataset("pyDataverse_user-guide", ds.json())
    │ │ │  Dataset with pid 'doi:10.5072/FK2/EO7BNB' created.
    │ │ │  >>> resp.json()
    │ │ │  {'status': 'OK', 'data': {'id': 442, 'persistentId': 'doi:10.5072/FK2/EO7BNB'}}
    │ │ │  
    │ │ │
    │ │ │

    Save the created PID (short for Persistent Identifier, which in │ │ │ -our case is the DOI) in a dict:

    │ │ │ +our case is the DOI) in a dict:

    │ │ │
    >>> ds_pid = resp.json()["data"]["persistentId"]
    │ │ │  
    │ │ │
    │ │ │

    Private Dataset URL’s can also be created. Use │ │ │ create_dataset_private_url() │ │ │ to get the URL and the private token:

    │ │ │
    >>> resp = api.create_dataset_private_url(ds_pid)
    │ │ │ ├── html2text {}
    │ │ │ │ @@ -115,29 +115,29 @@
    │ │ │ │  Now, as the metadata is imported, we don’t know if the data is valid and can be
    │ │ │ │  used to create a Dataset. Maybe some attributes are missing or misnamed, or a
    │ │ │ │  mistake during import happened. pyDataverse offers a convenient function to
    │ │ │ │  test this out with validate_json(), so you can move on with confidence:
    │ │ │ │  >>> ds.validate_json()
    │ │ │ │  True
    │ │ │ │  Adding or updating data manually is easy. With set() you can pass any attribute
    │ │ │ │ -you want as a collection of key-value pairs in a _d_i_c_t:
    │ │ │ │ +you want as a collection of key-value pairs in a dict:
    │ │ │ │  >>> ds.get()["title"]
    │ │ │ │  Youth in Austria 2005
    │ │ │ │  >>> ds.set({"title": "Youth from Austria 2005"})
    │ │ │ │  >>> ds.get()["title"]
    │ │ │ │  Youth from Austria 2005
    │ │ │ │  To upload the Dataset, use create_dataset(). You’ll pass the Dataverse
    │ │ │ │  collection where the Dataset should be attached and include the metadata as a
    │ │ │ │  JSON string (json()):
    │ │ │ │  >>> resp = api.create_dataset("pyDataverse_user-guide", ds.json())
    │ │ │ │  Dataset with pid 'doi:10.5072/FK2/EO7BNB' created.
    │ │ │ │  >>> resp.json()
    │ │ │ │  {'status': 'OK', 'data': {'id': 442, 'persistentId': 'doi:10.5072/FK2/EO7BNB'}}
    │ │ │ │  Save the created PID (short for Persistent Identifier, which in our case is the
    │ │ │ │ -DOI) in a _d_i_c_t:
    │ │ │ │ +DOI) in a dict:
    │ │ │ │  >>> ds_pid = resp.json()["data"]["persistentId"]
    │ │ │ │  Private Dataset URL’s can also be created. Use create_dataset_private_url() to
    │ │ │ │  get the URL and the private token:
    │ │ │ │  >>> resp = api.create_dataset_private_url(ds_pid)
    │ │ │ │  Dataset private URL created: http://data.aussda.at/privateurl.xhtml?token=
    │ │ │ │  {PRIVATE_TOKEN}
    │ │ │ │  >>> resp.json()
    │ │ ├── ./usr/share/doc/python3-pydataverse/html/user/csv-templates.html
    │ │ │ @@ -115,16 +115,16 @@
    │ │ │  

    Export from pyDataverse

    │ │ │

    If you want to export your metadata from a pyDataverse object ( │ │ │ Dataverse, │ │ │ Dataset, │ │ │ Datafile) │ │ │ to a CSV file:

    │ │ │
      │ │ │ -
    1. Get the metadata as dict (Dataverse.get(), Dataset.get() or Datafile.get()).

    2. │ │ │ -
    3. Pass the dict to write_dicts_as_csv(). Note: Use the internal attribute lists from pyDataverse.models to get a complete list of fieldnames for each Dataverse data-type (e. g. Dataset.__attr_import_dv_up_citation_fields_values).

    4. │ │ │ +
    5. Get the metadata as dict (Dataverse.get(), Dataset.get() or Datafile.get()).

    6. │ │ │ +
    7. Pass the dict to write_dicts_as_csv(). Note: Use the internal attribute lists from pyDataverse.models to get a complete list of fieldnames for each Dataverse data-type (e. g. Dataset.__attr_import_dv_up_citation_fields_values).

    8. │ │ │
    │ │ │
    │ │ │ │ │ │
    │ │ │

    Resources

    │ │ │