@@ -62,14 +62,15 @@ def generate_s3_file_path(
6262 user_name : str ,
6363 repo_name : str ,
6464 commit : str ,
65+ subpath : str ,
6566 include_patterns : set [str ] | None ,
6667 ignore_patterns : set [str ],
6768) -> str :
6869 """Generate S3 file path with proper naming convention.
6970
7071 The file path is formatted as:
7172 [<S3_DIRECTORY_PREFIX>/]ingest/<provider>/<repo-owner>/<repo-name>/<branch>/<commit-ID>/
72- <exclude&include hash>/<owner>-<repo-name>.txt
73+ <exclude&include hash>/<owner>-<repo-name>-<subpath-hash> .txt
7374
7475 If S3_DIRECTORY_PREFIX environment variable is set, it will be prefixed to the path.
7576 The commit-ID is always included in the URL.
@@ -85,6 +86,8 @@ def generate_s3_file_path(
8586 Repository name.
8687 commit : str
8788 Commit hash.
89+ subpath : str
90+ Subpath of the repository.
8891 include_patterns : set[str] | None
8992 Set of patterns specifying which files to include.
9093 ignore_patterns : set[str]
@@ -111,9 +114,10 @@ def generate_s3_file_path(
111114 patterns_str = f"include:{ sorted (include_patterns ) if include_patterns else []} "
112115 patterns_str += f"exclude:{ sorted (ignore_patterns )} "
113116 patterns_hash = hashlib .sha256 (patterns_str .encode ()).hexdigest ()[:16 ]
117+ subpath_hash = hashlib .sha256 (subpath .encode ()).hexdigest ()[:16 ]
114118
115- # Build the base path using hostname directly
116- base_path = f"ingest/{ hostname } /{ user_name } /{ repo_name } /{ commit } /{ patterns_hash } /{ user_name } - { repo_name } .txt "
119+ file_name = f" { user_name } - { repo_name } - { subpath_hash } .txt"
120+ base_path = f"ingest/{ hostname } /{ user_name } /{ repo_name } /{ commit } /{ patterns_hash } /{ file_name } "
117121
118122 # Check for S3_DIRECTORY_PREFIX environment variable
119123 s3_directory_prefix = os .getenv ("S3_DIRECTORY_PREFIX" )
0 commit comments