@@ -173,20 +173,23 @@ public native int generate(
173173 * @param height Input image height
174174 * @param channels Input image number of channels
175175 * @param startPos The starting position in KV cache of the input in the LLM.
176- * @return The updated starting position in KV cache of the input in the LLM.
176+ * @return 0, as the updated starting position in KV cache of the input in the LLM is no longer
177+ * exposed to user.
177178 * @throws RuntimeException if the prefill failed
178179 */
180+ @ Deprecated
179181 public long prefillImages (int [] image , int width , int height , int channels , long startPos ) {
180- long [] nativeResult = prefillImagesNative (image , width , height , channels , startPos );
181- if (nativeResult [0 ] != 0 ) {
182- throw new RuntimeException ("Prefill failed with error code: " + nativeResult [0 ]);
182+ if (startPos == 0 ) {
183+ resetContext ();
183184 }
184- return nativeResult [1 ];
185+ int nativeResult = appendImagesInput (image , width , height , channels );
186+ if (nativeResult != 0 ) {
187+ throw new RuntimeException ("Prefill failed with error code: " + nativeResult );
188+ }
189+ return 0 ;
185190 }
186191
187- // returns a tuple of (status, updated startPos)
188- private native long [] prefillImagesNative (
189- int [] image , int width , int height , int channels , long startPos );
192+ private native int appendImagesInput (int [] image , int width , int height , int channels );
190193
191194 /**
192195 * Prefill an LLaVA Module with the given text input.
@@ -196,33 +199,48 @@ private native long[] prefillImagesNative(
196199 * reference and will be updated inside this function.
197200 * @param bos The number of BOS (begin of sequence) token.
198201 * @param eos The number of EOS (end of sequence) token.
199- * @return The updated starting position in KV cache of the input in the LLM.
202+ * @return 0, as the updated starting position in KV cache of the input in the LLM is no longer
203+ * exposed to user.
200204 * @throws RuntimeException if the prefill failed
201205 */
206+ @ Deprecated
202207 public long prefillPrompt (String prompt , long startPos , int bos , int eos ) {
203- long [] nativeResult = prefillPromptNative (prompt , startPos , bos , eos );
204- if (nativeResult [0 ] != 0 ) {
205- throw new RuntimeException ("Prefill failed with error code: " + nativeResult [0 ]);
208+ if (startPos == 0 ) {
209+ resetContext ();
206210 }
207- return nativeResult [1 ];
211+ int nativeResult = appendTextInput (prompt , bos , eos );
212+ if (nativeResult != 0 ) {
213+ throw new RuntimeException ("Prefill failed with error code: " + nativeResult );
214+ }
215+ return 0 ;
208216 }
209217
210218 // returns a tuple of (status, updated startPos)
211- private native long [] prefillPromptNative (String prompt , long startPos , int bos , int eos );
219+ private native int appendTextInput (String prompt , int bos , int eos );
212220
213221 /**
214222 * Generate tokens from the given prompt, starting from the given position.
215223 *
224+ * <p>This is a deprecated API. Please use {@link #generate(String, int, LlmCallback, boolean)}
225+ *
216226 * @param prompt The text prompt to LLaVA.
217227 * @param seqLen The total sequence length, including the prompt tokens and new tokens.
218228 * @param startPos The starting position in KV cache of the input in the LLM.
219229 * @param callback callback object to receive results.
220230 * @param echo indicate whether to echo the input prompt or not.
221231 * @return The error code.
222232 */
233+ @ Deprecated
223234 public native int generateFromPos (
224235 String prompt , int seqLen , long startPos , LlmCallback callback , boolean echo );
225236
237+ /**
238+ * Reset the context of the LLM. This will clear the KV cache and reset the state of the LLM.
239+ *
240+ * <p>The startPos will be reset to 0.
241+ */
242+ public native void resetContext ();
243+
226244 /** Stop current generate() before it finishes. */
227245 @ DoNotStrip
228246 public native void stop ();
0 commit comments