Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Token Object Contracts for policy-based usage of Hugging Face Models #57

Open
wants to merge 5 commits into
base: cf_policy_for_hf_endpoints
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ PROJECT(pdo-contracts)
LIST(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
LIST(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/exchange-contract")
LIST(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/digital-asset-contract")
LIST(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/inference-contract")
LIST(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/hfmodels-contract")
INCLUDE(ProjectVariables)

LIST(APPEND CMAKE_MODULE_PATH "${PDO_SOURCE_ROOT}/contracts/wawaka")
Expand All @@ -27,7 +29,7 @@ INCLUDE(wawaka_common)
LIST(APPEND WASM_LIBRARIES ${WW_COMMON_LIB})
LIST(APPEND WASM_INCLUDES ${WW_COMMON_INCLUDES})

SET(CONTRACT_FAMILIES exchange-contract digital-asset-contract inference-contract)
SET(CONTRACT_FAMILIES exchange-contract digital-asset-contract inference-contract hfmodels-contract)

# A local cmake file (Local.cmake) allows for local overrides of
# variables. In particular, this is useful to set CONTRACT_FAMILIES
Expand Down
3 changes: 2 additions & 1 deletion exchange-contract/pdo/exchange/plugins/token_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,8 @@ def mint_one_token(cls, state, to_context, ti_context, dg_context, ledger_submit
state, to_context, to_session,
ledger_key,
to_package,
authority)
authority,
**kwargs)
return to_save_file

@classmethod
Expand Down
37 changes: 37 additions & 0 deletions hfmodels-contract/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Copyright 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This is necessary to get at the definitions necessary
# for the std::string class
INCLUDE(exchange_common)
LIST(APPEND WASM_LIBRARIES ${EXCHANGE_LIB})
LIST(APPEND WASM_INCLUDES ${EXCHANGE_INCLUDES})


INCLUDE(hfmodels_common.cmake)
LIST(APPEND WASM_LIBRARIES ${HFMODELS_LIB})
LIST(APPEND WASM_INCLUDES ${HFMODELS_INCLUDES})

ADD_LIBRARY(${HFMODELS_LIB} STATIC ${HFMODELS_SOURCES})
TARGET_INCLUDE_DIRECTORIES(${HFMODELS_LIB} PUBLIC ${HFMODELS_INCLUDES})

SET_PROPERTY(TARGET ${HFMODELS_LIB} APPEND_STRING PROPERTY COMPILE_OPTIONS "${WASM_BUILD_OPTIONS}")
SET_PROPERTY(TARGET ${HFMODELS_LIB} APPEND_STRING PROPERTY LINK_OPTIONS "${WASM_LINK_OPTIONS}")
SET_TARGET_PROPERTIES(${HFMODELS_LIB} PROPERTIES EXCLUDE_FROM_ALL TRUE)

BUILD_CONTRACT(hfmodels_token_object contracts/token_object.cpp)

# -----------------------------------------------------------------
INCLUDE(Python)
BUILD_WHEEL(hfmodels hfmodels_token_object)
32 changes: 32 additions & 0 deletions hfmodels-contract/MANIFEST
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
MANIFEST.in
./setup.py
./pdo/hfmodels/wsgi/provision_token_issuer.py
./pdo/hfmodels/wsgi/__init__.py
./pdo/hfmodels/wsgi/add_endpoint.py
./pdo/hfmodels/wsgi/provision_token_object.py
./pdo/hfmodels/wsgi/info.py
./pdo/hfmodels/wsgi/process_capability.py
./pdo/hfmodels/operations/use_hfmodel.py
./pdo/hfmodels/operations/__init__.py
./pdo/hfmodels/plugins/hfmodels_token_object.py
./pdo/hfmodels/plugins/__init__.py
./pdo/hfmodels/plugins/hfmodels_guardian.py
./pdo/hfmodels/__init__.py
./pdo/hfmodels/resources/resources.py
./pdo/hfmodels/resources/__init__.py
./pdo/hfmodels/common/guardian_service.py
./pdo/hfmodels/common/capability_keystore.py
./pdo/hfmodels/common/secrets.py
./pdo/hfmodels/common/__init__.py
./pdo/hfmodels/common/endpoint_registry.py
./pdo/hfmodels/common/utility.py
./pdo/hfmodels/common/capability_keys.py
./pdo/hfmodels/scripts/__init__.py
./pdo/hfmodels/scripts/guardianCLI.py
./pdo/hfmodels/scripts/scripts.py
./scripts/gs_stop.sh
./scripts/gs_start.sh
./scripts/gs_status.sh
./context/tokens.toml
./etc/hfmodels.toml
./etc/guardian_service.toml
4 changes: 4 additions & 0 deletions hfmodels-contract/MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
recursive-include ../build/hfmodels-contract *.b64
recursive-include etc *.toml
recursive-include context *.toml
recursive-include scripts *.sh
61 changes: 61 additions & 0 deletions hfmodels-contract/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
<!---
Licensed under Creative Commons Attribution 4.0 International License
https://creativecommons.org/licenses/by/4.0/
--->

# PDO Contracts for tokenization and policy based access of Hugging Face models #

This directory contains a Private Data Objects contract family for
creating a confidentiality preserving policy-wrapper around the usage
of (possibly private) machine learning (ML) models hosted on Hugging Face.
(`https://huggingface.co/models`).


## Problem Statement and Solution Overview
The majority of models shared via Hugging Face today are open-source.
For models that are private, or whose access need to be controlled, Hugging Face
provides the following options:

1. The model repository and its deployment could be kept entirely private
(`https://huggingface.co/docs/hub/en/repositories-getting-started#creating-a-repository`)
in which case only the model owner (personal model) or members of the organization
(organization model) can see and access the deployment.

2. `Gated Models` (`https://huggingface.co/docs/hub/models-gated`). Under gated models, prospective
users must provide basic information about themselves, and in addition should provide additional
information set by the model owner. Once approved by the model owner, the approved user gets access
to the model including the repository.

However the above currently available solutions do not permit a model owner to set fine-grained polices
for access control of models; whose raw-bytes must otherwise be kept confidential. In this PoC, we provide
a solution for how policy-controlled access to "private" models hosted on Hugging Face can be provided
to any third-party whose use-case suffices to the use the model under the terms of the policy. At a high-level,
the solution works as follows:

1. The model owner deploys the model as a private/gated model on Hugging Face. It is assumed that the model is available
for inference via Serverless API (`https://huggingface.co/docs/api-inference/index`) given the model owner's authentication tokens. (We haven't evaluated the PoC for Hugging Face managed inference endpoints (`https://huggingface.co/docs/inference-endpoints/index`) )

2. The model owner deploys PDO Token Object smart contract that encodes policies for usage of the model via Serverless Inference APIs. Currently, the PoC builds a generic token object which can be configured with the following information a. model owner authentication token b. REST API URL. Both of these are considered confidential information, and never exposed outside the PDO contract. The rest of the information is available for a prospective user of the asset c. Fixed Model parameters d. User input schema e. model/usage description f. max use count, referring to the maximum number of times model use capability packages can be obtained from the token object.

3. The model owner transfers ownership of the PDO token object to a new user. New user submits model use requests to the token
object, obtains capability packages, and submits to the guardian web-server that acts as a bridge between the PDO token object, and the Hugging Face serverless API endpoint. Like the token object, the guardian web-server is generic.
We have programmed the guardian to check the schema of the user inputs against the schema set by the model owner as part of the the token object params; otherwise the guardian functionality is model agnostic. One limitation of this approach is that
if privacy preserving input pre-processing or output postprocessing needs to be carried out (which might be natural to expect
given that the problem assumes the model itself is private), the current PoC needs additional enhancements.

4. The guardian invokes the REST API call to the serverless inference endpoint, obtains the result and returns back to the token owner. We have tested the PoC using some of the examples provided at `https://huggingface.co/docs/api-inference/detailed_parameters`.

## Additional Details about the Solution

At its core, the solution leverages the token-guardian protocol and its implementation contained within the [Exchange Contracts](../exchange-contract/README.md) family for policy-based use of high-value, possibly confidential assets. The current PoC does not employ TEEs for the guardian, however; for a more secure PoC, the token-guardian protocol permits usage of TEEs for the guardian web-server, and bi-directional attestation between the PDO token object and the guardian server.

Additionally, this PoC conceptually is similar to the [inference-contract](../inference-contract/README.md) PoC, where we showed how to create PDO token objects/guardians for policy-based usage of high-value ML model deployed for inferencing via the OpenVINO model server. The major difference is that OpenVINO currently does not provide a hosted inferencing solution; so it is up to the model owner to deploy the OpenVINO model server that hosts the model; and also prove that the hosted solution respects usage of the inferencing data. In the current Hugging Face use-case, the model owner does not manage the inferncing infrastrture; rather simply relies on solutions provided by HuggingFace. A detailed comparison of privacy-preserving properties of the two PoCs for both the model owner; and the inferencing user is doable based on interest from the community.


## Testing the PoC

The [test script](./test/script_test.sh) uses the open source `https://api-inference.huggingface.co/models/openai-community/gpt2` model available on Hugging Face via serverless APIs. In order to run the test script, please obtain a user access token
`https://huggingface.co/settings/tokens` that is necessary to make REST API calls to serverless API endpoints. Set the environment variable `HF_AUTH_TOKEN` with the token value. For the test, the assumption is that person who executes the test script owns the model; the token object will be configured with the person's token. The test will transfer token ownership to a fictitious `token_holder` and let the token_holder use the model subject to policies in the token object.

We note that the test has not been integrated with the automated test suite for the contracts repo due to the dependency on the HF access token to run the test for this PoC.

54 changes: 54 additions & 0 deletions hfmodels-contract/context/tokens.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Copyright 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# -----------------------------------------------------------------
# token ${token}
# -----------------------------------------------------------------
[token.${token}.asset_type]
module = "pdo.exchange.plugins.asset_type"
identity = "token_type"
source = "${ContractFamily.Exchange.asset_type.source}"
name = "${token}"
description = "asset type for ${token} token objects"
link = "http://"

[token.${token}.vetting]
module = "pdo.exchange.plugins.vetting"
identity = "token_vetting"
source = "${ContractFamily.Exchange.vetting.source}"
asset_type_context = "@{..asset_type}"

[token.${token}.guardian]
module = "pdo.hfmodels.plugins.hfmodels_guardian"
url = "${url}"
identity = "${..token_issuer.identity}"
token_issuer_context = "@{..token_issuer}"
service_only = true

[token.${token}.token_issuer]
module = "pdo.exchange.plugins.token_issuer"
identity = "token_issuer"
source = "${ContractFamily.Exchange.token_issuer.source}"
token_object_context = "@{..token_object}"
vetting_context = "@{..vetting}"
guardian_context = "@{..guardian}"
description = "issuer for token ${token}"
count = 1

[token.${token}.token_object]
module = "pdo.hfmodels.plugins.hfmodels_token_object"
identity = "${..token_issuer.identity}"
source = "${ContractFamily.hfmodels.token_object.source}"
token_issuer_context = "@{..token_issuer}"
data_guardian_context = "@{..guardian}"
78 changes: 78 additions & 0 deletions hfmodels-contract/contracts/token_object.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/* Copyright 2023 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <string>
#include <stddef.h>
#include <stdint.h>

#include "Dispatch.h"

#include "Cryptography.h"
#include "KeyValue.h"
#include "Environment.h"
#include "Message.h"
#include "Response.h"
#include "Types.h"
#include "Util.h"
#include "Value.h"
#include "WasmExtensions.h"

#include "contract/base.h"
#include "contract/attestation.h"
#include "exchange/issuer_authority_base.h"
#include "exchange/token_object.h"
#include "hfmodels/token_object.h"

// -----------------------------------------------------------------
// METHOD: initialize_contract
// -----------------------------------------------------------------
bool initialize_contract(const Environment& env, Response& rsp)
{
ASSERT_SUCCESS(rsp, ww::exchange::token_object::initialize_contract(env),
"failed to initialize the base contract");

return rsp.success(true);
}

// -----------------------------------------------------------------
// -----------------------------------------------------------------
contract_method_reference_t contract_method_dispatch_table[] = {

CONTRACT_METHOD2(get_verifying_key, ww::contract::base::get_verifying_key),
CONTRACT_METHOD2(initialize, ww::hfmodels::token_object::initialize),

// issuer methods
CONTRACT_METHOD2(get_asset_type_identifier, ww::exchange::issuer_authority_base::get_asset_type_identifier),
CONTRACT_METHOD2(get_issuer_authority, ww::exchange::issuer_authority_base::get_issuer_authority),
CONTRACT_METHOD2(get_authority, ww::exchange::issuer_authority_base::get_authority),

// from the attestation contract
CONTRACT_METHOD2(get_contract_metadata, ww::contract::attestation::get_contract_metadata),
CONTRACT_METHOD2(get_contract_code_metadata, ww::contract::attestation::get_contract_code_metadata),

// use the asset
CONTRACT_METHOD2(get_model_info, ww::hfmodels::token_object::get_model_info),
CONTRACT_METHOD2(use_model, ww::hfmodels::token_object::use_model),
CONTRACT_METHOD2(get_capability, ww::hfmodels::token_object::get_capability),

// object transfer, escrow & claim methods
CONTRACT_METHOD2(transfer,ww::exchange::token_object::transfer),
CONTRACT_METHOD2(escrow,ww::exchange::token_object::escrow),
CONTRACT_METHOD2(escrow_attestation,ww::exchange::token_object::escrow_attestation),
CONTRACT_METHOD2(release,ww::exchange::token_object::release),
CONTRACT_METHOD2(claim,ww::exchange::token_object::claim),

{ NULL, NULL }
};
69 changes: 69 additions & 0 deletions hfmodels-contract/etc/guardian_service.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Copyright 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# --------------------------------------------------
# GuardianService -- general information about the guardian service
# --------------------------------------------------
[GuardianService]
# Identity is a string used to identify the service in log files
Identity = "${identity}"
HttpPort = 7900
Host = "${host}"

# --------------------------------------------------
# StorageService -- information about passing kv stores
# --------------------------------------------------
[StorageService]
URL = "http://${host}:7901"
KeyValueStore = "${data}/guardian_service.mdb"
BlockStore = "${data}/guardian_service.mdb"
Identity = "${identity}"
HttpPort = 7901
Host = "${host}"
GarbageCollectionInterval = 0
MaxDuration = 0

# --------------------------------------------------
# Keys -- configuration for retrieving service keys
# --------------------------------------------------
[Key]
SearchPath = [ ".", "./keys", "${keys}" ]
FileName = "${identity}_private.pem"

# --------------------------------------------------
# Logging -- configuration of service logging
# --------------------------------------------------
[Logging]
LogLevel = "INFO"
LogFile = "${logs}/${identity}.log"

# --------------------------------------------------
# Data -- names for the various databases
# --------------------------------------------------
[Data]
EndpointRegistry = "${data}/endpoints.db"
CapabilityKeyStore = "${data}/keystore.db"

# --------------------------------------------------
# TokenIssuer -- configuration for TI verification
# --------------------------------------------------
[TokenIssuer]
LedgerKey = ""
CodeHash = ""
ContractIDs = []

# --------------------------------------------------
# TokenObject -- configuration for TO verification
# --------------------------------------------------
[TokenObject]
19 changes: 19 additions & 0 deletions hfmodels-contract/etc/hfmodels.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Copyright 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# -----------------------------------------------------------------
# HFMODELS family contract source
# -----------------------------------------------------------------
[ContractFamily.hfmodels]
token_object = { source = "${home}/contracts/hfmodels/_hfmodels_token_object.b64" }
Loading